Commit b6038942 authored by Shakeel Butt's avatar Shakeel Butt Committed by Linus Torvalds

mm: memcg: add swapcache stat for memcg v2

This patch adds swapcache stat for the cgroup v2.  The swapcache
represents the memory that is accounted against both the memory and the
swap limit of the cgroup.  The main motivation behind exposing the
swapcache stat is for enabling users to gracefully migrate from cgroup
v1's memsw counter to cgroup v2's memory and swap counters.

Cgroup v1's memsw limit allows users to limit the memory+swap usage of a
workload but without control on the exact proportion of memory and swap.
Cgroup v2 provides separate limits for memory and swap which enables more
control on the exact usage of memory and swap individually for the
workload.

With some little subtleties, the v1's memsw limit can be switched with the
sum of the v2's memory and swap limits.  However the alternative for memsw
usage is not yet available in cgroup v2.  Exposing per-cgroup swapcache
stat enables that alternative.  Adding the memory usage and swap usage and
subtracting the swapcache will approximate the memsw usage.  This will
help in the transparent migration of the workloads depending on memsw
usage and limit to v2' memory and swap counters.

The reasons these applications are still interested in this approximate
memsw usage are: (1) these applications are not really interested in two
separate memory and swap usage metrics.  A single usage metric is more
simple to use and reason about for them.

(2) The memsw usage metric hides the underlying system's swap setup from
the applications.  Applications with multiple instances running in a
datacenter with heterogeneous systems (some have swap and some don't) will
keep seeing a consistent view of their usage.

[akpm@linux-foundation.org: fix CONFIG_SWAP=n build]

Link: https://lkml.kernel.org/r/20210108155813.2914586-3-shakeelb@google.comSigned-off-by: default avatarShakeel Butt <shakeelb@google.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Reviewed-by: default avatarRoman Gushchin <guro@fb.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent f9b1038e
...@@ -1299,6 +1299,10 @@ PAGE_SIZE multiple when read back. ...@@ -1299,6 +1299,10 @@ PAGE_SIZE multiple when read back.
Amount of cached filesystem data that was modified and Amount of cached filesystem data that was modified and
is currently being written back to disk is currently being written back to disk
swapcached
Amount of swap cached in memory. The swapcache is accounted
against both memory and swap usage.
anon_thp anon_thp
Amount of memory used in anonymous mappings backed by Amount of memory used in anonymous mappings backed by
transparent hugepages transparent hugepages
......
...@@ -372,14 +372,19 @@ static ssize_t node_read_meminfo(struct device *dev, ...@@ -372,14 +372,19 @@ static ssize_t node_read_meminfo(struct device *dev,
struct pglist_data *pgdat = NODE_DATA(nid); struct pglist_data *pgdat = NODE_DATA(nid);
struct sysinfo i; struct sysinfo i;
unsigned long sreclaimable, sunreclaimable; unsigned long sreclaimable, sunreclaimable;
unsigned long swapcached = 0;
si_meminfo_node(&i, nid); si_meminfo_node(&i, nid);
sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B); sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B); sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
#ifdef CONFIG_SWAP
swapcached = node_page_state_pages(pgdat, NR_SWAPCACHE);
#endif
len = sysfs_emit_at(buf, len, len = sysfs_emit_at(buf, len,
"Node %d MemTotal: %8lu kB\n" "Node %d MemTotal: %8lu kB\n"
"Node %d MemFree: %8lu kB\n" "Node %d MemFree: %8lu kB\n"
"Node %d MemUsed: %8lu kB\n" "Node %d MemUsed: %8lu kB\n"
"Node %d SwapCached: %8lu kB\n"
"Node %d Active: %8lu kB\n" "Node %d Active: %8lu kB\n"
"Node %d Inactive: %8lu kB\n" "Node %d Inactive: %8lu kB\n"
"Node %d Active(anon): %8lu kB\n" "Node %d Active(anon): %8lu kB\n"
...@@ -391,6 +396,7 @@ static ssize_t node_read_meminfo(struct device *dev, ...@@ -391,6 +396,7 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(i.totalram), nid, K(i.totalram),
nid, K(i.freeram), nid, K(i.freeram),
nid, K(i.totalram - i.freeram), nid, K(i.totalram - i.freeram),
nid, K(swapcached),
nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) + nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
node_page_state(pgdat, NR_ACTIVE_FILE)), node_page_state(pgdat, NR_ACTIVE_FILE)),
nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) + nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
......
...@@ -206,6 +206,9 @@ enum node_stat_item { ...@@ -206,6 +206,9 @@ enum node_stat_item {
NR_KERNEL_SCS_KB, /* measured in KiB */ NR_KERNEL_SCS_KB, /* measured in KiB */
#endif #endif
NR_PAGETABLE, /* used for pagetables */ NR_PAGETABLE, /* used for pagetables */
#ifdef CONFIG_SWAP
NR_SWAPCACHE,
#endif
NR_VM_NODE_STAT_ITEMS NR_VM_NODE_STAT_ITEMS
}; };
......
...@@ -408,7 +408,11 @@ extern struct address_space *swapper_spaces[]; ...@@ -408,7 +408,11 @@ extern struct address_space *swapper_spaces[];
#define swap_address_space(entry) \ #define swap_address_space(entry) \
(&swapper_spaces[swp_type(entry)][swp_offset(entry) \ (&swapper_spaces[swp_type(entry)][swp_offset(entry) \
>> SWAP_ADDRESS_SPACE_SHIFT]) >> SWAP_ADDRESS_SPACE_SHIFT])
extern unsigned long total_swapcache_pages(void); static inline unsigned long total_swapcache_pages(void)
{
return global_node_page_state(NR_SWAPCACHE);
}
extern void show_swap_cache_info(void); extern void show_swap_cache_info(void);
extern int add_to_swap(struct page *page); extern int add_to_swap(struct page *page);
extern void *get_shadow_from_swap_cache(swp_entry_t entry); extern void *get_shadow_from_swap_cache(swp_entry_t entry);
......
...@@ -1521,6 +1521,9 @@ static const struct memory_stat memory_stats[] = { ...@@ -1521,6 +1521,9 @@ static const struct memory_stat memory_stats[] = {
{ "file_mapped", NR_FILE_MAPPED }, { "file_mapped", NR_FILE_MAPPED },
{ "file_dirty", NR_FILE_DIRTY }, { "file_dirty", NR_FILE_DIRTY },
{ "file_writeback", NR_WRITEBACK }, { "file_writeback", NR_WRITEBACK },
#ifdef CONFIG_SWAP
{ "swapcached", NR_SWAPCACHE },
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
{ "anon_thp", NR_ANON_THPS }, { "anon_thp", NR_ANON_THPS },
{ "file_thp", NR_FILE_THPS }, { "file_thp", NR_FILE_THPS },
......
...@@ -500,6 +500,12 @@ int migrate_page_move_mapping(struct address_space *mapping, ...@@ -500,6 +500,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
__mod_lruvec_state(old_lruvec, NR_SHMEM, -nr); __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
__mod_lruvec_state(new_lruvec, NR_SHMEM, nr); __mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
} }
#ifdef CONFIG_SWAP
if (PageSwapCache(page)) {
__mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr);
__mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr);
}
#endif
if (dirty && mapping_can_writeback(mapping)) { if (dirty && mapping_can_writeback(mapping)) {
__mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr); __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr);
__mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr); __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr);
......
...@@ -68,32 +68,6 @@ static struct { ...@@ -68,32 +68,6 @@ static struct {
unsigned long find_total; unsigned long find_total;
} swap_cache_info; } swap_cache_info;
unsigned long total_swapcache_pages(void)
{
unsigned int i, j, nr;
unsigned long ret = 0;
struct address_space *spaces;
struct swap_info_struct *si;
for (i = 0; i < MAX_SWAPFILES; i++) {
swp_entry_t entry = swp_entry(i, 1);
/* Avoid get_swap_device() to warn for bad swap entry */
if (!swp_swap_info(entry))
continue;
/* Prevent swapoff to free swapper_spaces */
si = get_swap_device(entry);
if (!si)
continue;
nr = nr_swapper_spaces[i];
spaces = swapper_spaces[i];
for (j = 0; j < nr; j++)
ret += spaces[j].nrpages;
put_swap_device(si);
}
return ret;
}
static atomic_t swapin_readahead_hits = ATOMIC_INIT(4); static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
void show_swap_cache_info(void) void show_swap_cache_info(void)
...@@ -163,6 +137,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, ...@@ -163,6 +137,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry,
address_space->nrexceptional -= nr_shadows; address_space->nrexceptional -= nr_shadows;
address_space->nrpages += nr; address_space->nrpages += nr;
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr); __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
__mod_lruvec_page_state(page, NR_SWAPCACHE, nr);
ADD_CACHE_INFO(add_total, nr); ADD_CACHE_INFO(add_total, nr);
unlock: unlock:
xas_unlock_irq(&xas); xas_unlock_irq(&xas);
...@@ -203,6 +178,7 @@ void __delete_from_swap_cache(struct page *page, ...@@ -203,6 +178,7 @@ void __delete_from_swap_cache(struct page *page,
address_space->nrexceptional += nr; address_space->nrexceptional += nr;
address_space->nrpages -= nr; address_space->nrpages -= nr;
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr); __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
__mod_lruvec_page_state(page, NR_SWAPCACHE, -nr);
ADD_CACHE_INFO(del_total, nr); ADD_CACHE_INFO(del_total, nr);
} }
......
...@@ -1215,6 +1215,9 @@ const char * const vmstat_text[] = { ...@@ -1215,6 +1215,9 @@ const char * const vmstat_text[] = {
"nr_shadow_call_stack", "nr_shadow_call_stack",
#endif #endif
"nr_page_table_pages", "nr_page_table_pages",
#ifdef CONFIG_SWAP
"nr_swapcached",
#endif
/* enum writeback_stat_item counters */ /* enum writeback_stat_item counters */
"nr_dirty_threshold", "nr_dirty_threshold",
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment