Commit c4159a75 authored by Vladimir Davydov's avatar Vladimir Davydov Committed by Linus Torvalds

mm: memcontrol: only mark charged pages with PageKmemcg

To distinguish non-slab pages charged to kmemcg we mark them PageKmemcg,
which sets page->_mapcount to -512.  Currently, we set/clear PageKmemcg
in __alloc_pages_nodemask()/free_pages_prepare() for any page allocated
with __GFP_ACCOUNT, including those that aren't actually charged to any
cgroup, i.e. allocated from the root cgroup context.  To avoid overhead
in case cgroups are not used, we only do that if memcg_kmem_enabled() is
true.  The latter is set iff there are kmem-enabled memory cgroups
(online or offline).  The root cgroup is not considered kmem-enabled.

As a result, if a page is allocated with __GFP_ACCOUNT for the root
cgroup when there are kmem-enabled memory cgroups and is freed after all
kmem-enabled memory cgroups were removed, e.g.

  # no memory cgroups has been created yet, create one
  mkdir /sys/fs/cgroup/memory/test
  # run something allocating pages with __GFP_ACCOUNT, e.g.
  # a program using pipe
  dmesg | tail
  # remove the memory cgroup
  rmdir /sys/fs/cgroup/memory/test

we'll get bad page state bug complaining about page->_mapcount != -1:

  BUG: Bad page state in process swapper/0  pfn:1fd945c
  page:ffffea007f651700 count:0 mapcount:-511 mapping:          (null) index:0x0
  flags: 0x1000000000000000()

To avoid that, let's mark with PageKmemcg only those pages that are
actually charged to and hence pin a non-root memory cgroup.

Fixes: 4949148a ("mm: charge/uncharge kmemcg from generic page allocator paths")
Reported-and-tested-by: default avatarEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: default avatarVladimir Davydov <vdavydov@virtuozzo.com>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 81abf252
...@@ -144,10 +144,8 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, ...@@ -144,10 +144,8 @@ static int anon_pipe_buf_steal(struct pipe_inode_info *pipe,
struct page *page = buf->page; struct page *page = buf->page;
if (page_count(page) == 1) { if (page_count(page) == 1) {
if (memcg_kmem_enabled()) { if (memcg_kmem_enabled())
memcg_kmem_uncharge(page, 0); memcg_kmem_uncharge(page, 0);
__ClearPageKmemcg(page);
}
__SetPageLocked(page); __SetPageLocked(page);
return 0; return 0;
} }
......
...@@ -2337,8 +2337,11 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) ...@@ -2337,8 +2337,11 @@ int memcg_kmem_charge(struct page *page, gfp_t gfp, int order)
return 0; return 0;
memcg = get_mem_cgroup_from_mm(current->mm); memcg = get_mem_cgroup_from_mm(current->mm);
if (!mem_cgroup_is_root(memcg)) if (!mem_cgroup_is_root(memcg)) {
ret = memcg_kmem_charge_memcg(page, gfp, order, memcg); ret = memcg_kmem_charge_memcg(page, gfp, order, memcg);
if (!ret)
__SetPageKmemcg(page);
}
css_put(&memcg->css); css_put(&memcg->css);
return ret; return ret;
} }
...@@ -2365,6 +2368,11 @@ void memcg_kmem_uncharge(struct page *page, int order) ...@@ -2365,6 +2368,11 @@ void memcg_kmem_uncharge(struct page *page, int order)
page_counter_uncharge(&memcg->memsw, nr_pages); page_counter_uncharge(&memcg->memsw, nr_pages);
page->mem_cgroup = NULL; page->mem_cgroup = NULL;
/* slab pages do not have PageKmemcg flag set */
if (PageKmemcg(page))
__ClearPageKmemcg(page);
css_put_many(&memcg->css, nr_pages); css_put_many(&memcg->css, nr_pages);
} }
#endif /* !CONFIG_SLOB */ #endif /* !CONFIG_SLOB */
...@@ -5537,8 +5545,10 @@ static void uncharge_list(struct list_head *page_list) ...@@ -5537,8 +5545,10 @@ static void uncharge_list(struct list_head *page_list)
else else
nr_file += nr_pages; nr_file += nr_pages;
pgpgout++; pgpgout++;
} else } else {
nr_kmem += 1 << compound_order(page); nr_kmem += 1 << compound_order(page);
__ClearPageKmemcg(page);
}
page->mem_cgroup = NULL; page->mem_cgroup = NULL;
} while (next != page_list); } while (next != page_list);
......
...@@ -1008,10 +1008,8 @@ static __always_inline bool free_pages_prepare(struct page *page, ...@@ -1008,10 +1008,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
} }
if (PageMappingFlags(page)) if (PageMappingFlags(page))
page->mapping = NULL; page->mapping = NULL;
if (memcg_kmem_enabled() && PageKmemcg(page)) { if (memcg_kmem_enabled() && PageKmemcg(page))
memcg_kmem_uncharge(page, order); memcg_kmem_uncharge(page, order);
__ClearPageKmemcg(page);
}
if (check_free) if (check_free)
bad += free_pages_check(page); bad += free_pages_check(page);
if (bad) if (bad)
...@@ -3756,12 +3754,10 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, ...@@ -3756,12 +3754,10 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
} }
out: out:
if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page) { if (memcg_kmem_enabled() && (gfp_mask & __GFP_ACCOUNT) && page &&
if (unlikely(memcg_kmem_charge(page, gfp_mask, order))) { unlikely(memcg_kmem_charge(page, gfp_mask, order) != 0)) {
__free_pages(page, order); __free_pages(page, order);
page = NULL; page = NULL;
} else
__SetPageKmemcg(page);
} }
if (kmemcheck_enabled && page) if (kmemcheck_enabled && page)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment