Commit 9855609b authored by Roman Gushchin's avatar Roman Gushchin Committed by Linus Torvalds

mm: memcg/slab: use a single set of kmem_caches for all accounted allocations

This is fairly big but mostly red patch, which makes all accounted slab
allocations use a single set of kmem_caches instead of creating a separate
set for each memory cgroup.

Because the number of non-root kmem_caches is now capped by the number of
root kmem_caches, there is no need to shrink or destroy them prematurely.
They can be perfectly destroyed together with their root counterparts.
This allows to dramatically simplify the management of non-root
kmem_caches and delete a ton of code.

This patch performs the following changes:
1) introduces memcg_params.memcg_cache pointer to represent the
   kmem_cache which will be used for all non-root allocations
2) reuses the existing memcg kmem_cache creation mechanism
   to create memcg kmem_cache on the first allocation attempt
3) memcg kmem_caches are named <kmemcache_name>-memcg,
   e.g. dentry-memcg
4) simplifies memcg_kmem_get_cache() to just return memcg kmem_cache
   or schedule it's creation and return the root cache
5) removes almost all non-root kmem_cache management code
   (separate refcounter, reparenting, shrinking, etc)
6) makes slab debugfs to display root_mem_cgroup css id and never
   show :dead and :deact flags in the memcg_slabinfo attribute.

Following patches in the series will simplify the kmem_cache creation.
Signed-off-by: default avatarRoman Gushchin <guro@fb.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Reviewed-by: default avatarVlastimil Babka <vbabka@suse.cz>
Reviewed-by: default avatarShakeel Butt <shakeelb@google.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20200623174037.3951353-13-guro@fb.comSigned-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 0f876e4d
...@@ -317,7 +317,6 @@ struct mem_cgroup { ...@@ -317,7 +317,6 @@ struct mem_cgroup {
/* Index in the kmem_cache->memcg_params.memcg_caches array */ /* Index in the kmem_cache->memcg_params.memcg_caches array */
int kmemcg_id; int kmemcg_id;
enum memcg_kmem_state kmem_state; enum memcg_kmem_state kmem_state;
struct list_head kmem_caches;
struct obj_cgroup __rcu *objcg; struct obj_cgroup __rcu *objcg;
struct list_head objcg_list; /* list of inherited objcgs */ struct list_head objcg_list; /* list of inherited objcgs */
#endif #endif
...@@ -1404,9 +1403,7 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg, ...@@ -1404,9 +1403,7 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
} }
#endif #endif
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep, struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep);
struct obj_cgroup **objcgp);
void memcg_kmem_put_cache(struct kmem_cache *cachep);
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
......
...@@ -155,8 +155,7 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name, ...@@ -155,8 +155,7 @@ struct kmem_cache *kmem_cache_create_usercopy(const char *name,
void kmem_cache_destroy(struct kmem_cache *); void kmem_cache_destroy(struct kmem_cache *);
int kmem_cache_shrink(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *);
void memcg_create_kmem_cache(struct mem_cgroup *, struct kmem_cache *); void memcg_create_kmem_cache(struct kmem_cache *cachep);
void memcg_deactivate_kmem_caches(struct mem_cgroup *, struct mem_cgroup *);
/* /*
* Please use this macro to create slab caches. Simply specify the * Please use this macro to create slab caches. Simply specify the
...@@ -580,8 +579,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) ...@@ -580,8 +579,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
return __kmalloc_node(size, flags, node); return __kmalloc_node(size, flags, node);
} }
int memcg_update_all_caches(int num_memcgs);
/** /**
* kmalloc_array - allocate memory for an array. * kmalloc_array - allocate memory for an array.
* @n: number of elements. * @n: number of elements.
......
...@@ -350,7 +350,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg, ...@@ -350,7 +350,7 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg,
} }
/* /*
* This will be the memcg's index in each cache's ->memcg_params.memcg_caches. * This will be used as a shrinker list's index.
* The main reason for not using cgroup id for this: * The main reason for not using cgroup id for this:
* this works better in sparse environments, where we have a lot of memcgs, * this works better in sparse environments, where we have a lot of memcgs,
* but only a few kmem-limited. Or also, if we have, for instance, 200 * but only a few kmem-limited. Or also, if we have, for instance, 200
...@@ -569,20 +569,16 @@ ino_t page_cgroup_ino(struct page *page) ...@@ -569,20 +569,16 @@ ino_t page_cgroup_ino(struct page *page)
unsigned long ino = 0; unsigned long ino = 0;
rcu_read_lock(); rcu_read_lock();
if (PageSlab(page) && !PageTail(page)) { memcg = page->mem_cgroup;
memcg = memcg_from_slab_page(page);
} else {
memcg = page->mem_cgroup;
/* /*
* The lowest bit set means that memcg isn't a valid * The lowest bit set means that memcg isn't a valid
* memcg pointer, but a obj_cgroups pointer. * memcg pointer, but a obj_cgroups pointer.
* In this case the page is shared and doesn't belong * In this case the page is shared and doesn't belong
* to any specific memory cgroup. * to any specific memory cgroup.
*/ */
if ((unsigned long) memcg & 0x1UL) if ((unsigned long) memcg & 0x1UL)
memcg = NULL; memcg = NULL;
}
while (memcg && !(memcg->css.flags & CSS_ONLINE)) while (memcg && !(memcg->css.flags & CSS_ONLINE))
memcg = parent_mem_cgroup(memcg); memcg = parent_mem_cgroup(memcg);
...@@ -2822,12 +2818,18 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) ...@@ -2822,12 +2818,18 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p)
page = virt_to_head_page(p); page = virt_to_head_page(p);
/* /*
* Slab pages don't have page->mem_cgroup set because corresponding * Slab objects are accounted individually, not per-page.
* kmem caches can be reparented during the lifetime. That's why * Memcg membership data for each individual object is saved in
* memcg_from_slab_page() should be used instead. * the page->obj_cgroups.
*/ */
if (PageSlab(page)) if (page_has_obj_cgroups(page)) {
return memcg_from_slab_page(page); struct obj_cgroup *objcg;
unsigned int off;
off = obj_to_index(page->slab_cache, page, p);
objcg = page_obj_cgroups(page)[off];
return obj_cgroup_memcg(objcg);
}
/* All other pages use page->mem_cgroup */ /* All other pages use page->mem_cgroup */
return page->mem_cgroup; return page->mem_cgroup;
...@@ -2882,9 +2884,7 @@ static int memcg_alloc_cache_id(void) ...@@ -2882,9 +2884,7 @@ static int memcg_alloc_cache_id(void)
else if (size > MEMCG_CACHES_MAX_SIZE) else if (size > MEMCG_CACHES_MAX_SIZE)
size = MEMCG_CACHES_MAX_SIZE; size = MEMCG_CACHES_MAX_SIZE;
err = memcg_update_all_caches(size); err = memcg_update_all_list_lrus(size);
if (!err)
err = memcg_update_all_list_lrus(size);
if (!err) if (!err)
memcg_nr_cache_ids = size; memcg_nr_cache_ids = size;
...@@ -2903,7 +2903,6 @@ static void memcg_free_cache_id(int id) ...@@ -2903,7 +2903,6 @@ static void memcg_free_cache_id(int id)
} }
struct memcg_kmem_cache_create_work { struct memcg_kmem_cache_create_work {
struct mem_cgroup *memcg;
struct kmem_cache *cachep; struct kmem_cache *cachep;
struct work_struct work; struct work_struct work;
}; };
...@@ -2912,33 +2911,24 @@ static void memcg_kmem_cache_create_func(struct work_struct *w) ...@@ -2912,33 +2911,24 @@ static void memcg_kmem_cache_create_func(struct work_struct *w)
{ {
struct memcg_kmem_cache_create_work *cw = struct memcg_kmem_cache_create_work *cw =
container_of(w, struct memcg_kmem_cache_create_work, work); container_of(w, struct memcg_kmem_cache_create_work, work);
struct mem_cgroup *memcg = cw->memcg;
struct kmem_cache *cachep = cw->cachep; struct kmem_cache *cachep = cw->cachep;
memcg_create_kmem_cache(memcg, cachep); memcg_create_kmem_cache(cachep);
css_put(&memcg->css);
kfree(cw); kfree(cw);
} }
/* /*
* Enqueue the creation of a per-memcg kmem_cache. * Enqueue the creation of a per-memcg kmem_cache.
*/ */
static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, static void memcg_schedule_kmem_cache_create(struct kmem_cache *cachep)
struct kmem_cache *cachep)
{ {
struct memcg_kmem_cache_create_work *cw; struct memcg_kmem_cache_create_work *cw;
if (!css_tryget_online(&memcg->css))
return;
cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN); cw = kmalloc(sizeof(*cw), GFP_NOWAIT | __GFP_NOWARN);
if (!cw) { if (!cw)
css_put(&memcg->css);
return; return;
}
cw->memcg = memcg;
cw->cachep = cachep; cw->cachep = cachep;
INIT_WORK(&cw->work, memcg_kmem_cache_create_func); INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
...@@ -2946,102 +2936,26 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg, ...@@ -2946,102 +2936,26 @@ static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
} }
/** /**
* memcg_kmem_get_cache: select the correct per-memcg cache for allocation * memcg_kmem_get_cache: select memcg or root cache for allocation
* @cachep: the original global kmem cache * @cachep: the original global kmem cache
* *
* Return the kmem_cache we're supposed to use for a slab allocation. * Return the kmem_cache we're supposed to use for a slab allocation.
* We try to use the current memcg's version of the cache.
* *
* If the cache does not exist yet, if we are the first user of it, we * If the cache does not exist yet, if we are the first user of it, we
* create it asynchronously in a workqueue and let the current allocation * create it asynchronously in a workqueue and let the current allocation
* go through with the original cache. * go through with the original cache.
*
* This function takes a reference to the cache it returns to assure it
* won't get destroyed while we are working with it. Once the caller is
* done with it, memcg_kmem_put_cache() must be called to release the
* reference.
*/ */
struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep, struct kmem_cache *memcg_kmem_get_cache(struct kmem_cache *cachep)
struct obj_cgroup **objcgp)
{ {
struct mem_cgroup *memcg;
struct kmem_cache *memcg_cachep; struct kmem_cache *memcg_cachep;
struct memcg_cache_array *arr;
int kmemcg_id;
VM_BUG_ON(!is_root_cache(cachep)); memcg_cachep = READ_ONCE(cachep->memcg_params.memcg_cache);
if (unlikely(!memcg_cachep)) {
if (memcg_kmem_bypass()) memcg_schedule_kmem_cache_create(cachep);
return cachep; return cachep;
rcu_read_lock();
if (unlikely(current->active_memcg))
memcg = current->active_memcg;
else
memcg = mem_cgroup_from_task(current);
if (!memcg || memcg == root_mem_cgroup)
goto out_unlock;
kmemcg_id = READ_ONCE(memcg->kmemcg_id);
if (kmemcg_id < 0)
goto out_unlock;
arr = rcu_dereference(cachep->memcg_params.memcg_caches);
/*
* Make sure we will access the up-to-date value. The code updating
* memcg_caches issues a write barrier to match the data dependency
* barrier inside READ_ONCE() (see memcg_create_kmem_cache()).
*/
memcg_cachep = READ_ONCE(arr->entries[kmemcg_id]);
/*
* If we are in a safe context (can wait, and not in interrupt
* context), we could be be predictable and return right away.
* This would guarantee that the allocation being performed
* already belongs in the new cache.
*
* However, there are some clashes that can arrive from locking.
* For instance, because we acquire the slab_mutex while doing
* memcg_create_kmem_cache, this means no further allocation
* could happen with the slab_mutex held. So it's better to
* defer everything.
*
* If the memcg is dying or memcg_cache is about to be released,
* don't bother creating new kmem_caches. Because memcg_cachep
* is ZEROed as the fist step of kmem offlining, we don't need
* percpu_ref_tryget_live() here. css_tryget_online() check in
* memcg_schedule_kmem_cache_create() will prevent us from
* creation of a new kmem_cache.
*/
if (unlikely(!memcg_cachep))
memcg_schedule_kmem_cache_create(memcg, cachep);
else if (percpu_ref_tryget(&memcg_cachep->memcg_params.refcnt)) {
struct obj_cgroup *objcg = rcu_dereference(memcg->objcg);
if (!objcg || !obj_cgroup_tryget(objcg)) {
percpu_ref_put(&memcg_cachep->memcg_params.refcnt);
goto out_unlock;
}
*objcgp = objcg;
cachep = memcg_cachep;
} }
out_unlock:
rcu_read_unlock();
return cachep;
}
/** return memcg_cachep;
* memcg_kmem_put_cache: drop reference taken by memcg_kmem_get_cache
* @cachep: the cache returned by memcg_kmem_get_cache
*/
void memcg_kmem_put_cache(struct kmem_cache *cachep)
{
if (!is_root_cache(cachep))
percpu_ref_put(&cachep->memcg_params.refcnt);
} }
/** /**
...@@ -3731,7 +3645,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg) ...@@ -3731,7 +3645,6 @@ static int memcg_online_kmem(struct mem_cgroup *memcg)
*/ */
memcg->kmemcg_id = memcg_id; memcg->kmemcg_id = memcg_id;
memcg->kmem_state = KMEM_ONLINE; memcg->kmem_state = KMEM_ONLINE;
INIT_LIST_HEAD(&memcg->kmem_caches);
return 0; return 0;
} }
...@@ -3744,22 +3657,13 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) ...@@ -3744,22 +3657,13 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
if (memcg->kmem_state != KMEM_ONLINE) if (memcg->kmem_state != KMEM_ONLINE)
return; return;
/*
* Clear the online state before clearing memcg_caches array
* entries. The slab_mutex in memcg_deactivate_kmem_caches()
* guarantees that no cache will be created for this cgroup
* after we are done (see memcg_create_kmem_cache()).
*/
memcg->kmem_state = KMEM_ALLOCATED; memcg->kmem_state = KMEM_ALLOCATED;
parent = parent_mem_cgroup(memcg); parent = parent_mem_cgroup(memcg);
if (!parent) if (!parent)
parent = root_mem_cgroup; parent = root_mem_cgroup;
/*
* Deactivate and reparent kmem_caches and objcgs.
*/
memcg_deactivate_kmem_caches(memcg, parent);
memcg_reparent_objcgs(memcg, parent); memcg_reparent_objcgs(memcg, parent);
kmemcg_id = memcg->kmemcg_id; kmemcg_id = memcg->kmemcg_id;
...@@ -5384,9 +5288,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -5384,9 +5288,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
/* The following stuff does not apply to the root */ /* The following stuff does not apply to the root */
if (!parent) { if (!parent) {
#ifdef CONFIG_MEMCG_KMEM
INIT_LIST_HEAD(&memcg->kmem_caches);
#endif
root_mem_cgroup = memcg; root_mem_cgroup = memcg;
return &memcg->css; return &memcg->css;
} }
......
...@@ -1249,7 +1249,7 @@ void __init kmem_cache_init(void) ...@@ -1249,7 +1249,7 @@ void __init kmem_cache_init(void)
nr_node_ids * sizeof(struct kmem_cache_node *), nr_node_ids * sizeof(struct kmem_cache_node *),
SLAB_HWCACHE_ALIGN, 0, 0); SLAB_HWCACHE_ALIGN, 0, 0);
list_add(&kmem_cache->list, &slab_caches); list_add(&kmem_cache->list, &slab_caches);
memcg_link_cache(kmem_cache, NULL); memcg_link_cache(kmem_cache);
slab_state = PARTIAL; slab_state = PARTIAL;
/* /*
...@@ -2253,17 +2253,6 @@ int __kmem_cache_shrink(struct kmem_cache *cachep) ...@@ -2253,17 +2253,6 @@ int __kmem_cache_shrink(struct kmem_cache *cachep)
return (ret ? 1 : 0); return (ret ? 1 : 0);
} }
#ifdef CONFIG_MEMCG
void __kmemcg_cache_deactivate(struct kmem_cache *cachep)
{
__kmem_cache_shrink(cachep);
}
void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
{
}
#endif
int __kmem_cache_shutdown(struct kmem_cache *cachep) int __kmem_cache_shutdown(struct kmem_cache *cachep)
{ {
return __kmem_cache_shrink(cachep); return __kmem_cache_shrink(cachep);
...@@ -3872,7 +3861,8 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, ...@@ -3872,7 +3861,8 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
return ret; return ret;
lockdep_assert_held(&slab_mutex); lockdep_assert_held(&slab_mutex);
for_each_memcg_cache(c, cachep) { c = memcg_cache(cachep);
if (c) {
/* return value determined by the root cache only */ /* return value determined by the root cache only */
__do_tune_cpucache(c, limit, batchcount, shared, gfp); __do_tune_cpucache(c, limit, batchcount, shared, gfp);
} }
......
...@@ -32,66 +32,25 @@ struct kmem_cache { ...@@ -32,66 +32,25 @@ struct kmem_cache {
#else /* !CONFIG_SLOB */ #else /* !CONFIG_SLOB */
struct memcg_cache_array {
struct rcu_head rcu;
struct kmem_cache *entries[0];
};
/* /*
* This is the main placeholder for memcg-related information in kmem caches. * This is the main placeholder for memcg-related information in kmem caches.
* Both the root cache and the child caches will have it. For the root cache, * Both the root cache and the child cache will have it. Some fields are used
* this will hold a dynamically allocated array large enough to hold * in both cases, other are specific to root caches.
* information about the currently limited memcgs in the system. To allow the
* array to be accessed without taking any locks, on relocation we free the old
* version only after a grace period.
*
* Root and child caches hold different metadata.
* *
* @root_cache: Common to root and child caches. NULL for root, pointer to * @root_cache: Common to root and child caches. NULL for root, pointer to
* the root cache for children. * the root cache for children.
* *
* The following fields are specific to root caches. * The following fields are specific to root caches.
* *
* @memcg_caches: kmemcg ID indexed table of child caches. This table is * @memcg_cache: pointer to memcg kmem cache, used by all non-root memory
* used to index child cachces during allocation and cleared * cgroups.
* early during shutdown. * @root_caches_node: list node for slab_root_caches list.
*
* @root_caches_node: List node for slab_root_caches list.
*
* @children: List of all child caches. While the child caches are also
* reachable through @memcg_caches, a child cache remains on
* this list until it is actually destroyed.
*
* The following fields are specific to child caches.
*
* @memcg: Pointer to the memcg this cache belongs to.
*
* @children_node: List node for @root_cache->children list.
*
* @kmem_caches_node: List node for @memcg->kmem_caches list.
*/ */
struct memcg_cache_params { struct memcg_cache_params {
struct kmem_cache *root_cache; struct kmem_cache *root_cache;
union {
struct { struct kmem_cache *memcg_cache;
struct memcg_cache_array __rcu *memcg_caches; struct list_head __root_caches_node;
struct list_head __root_caches_node;
struct list_head children;
bool dying;
};
struct {
struct mem_cgroup *memcg;
struct list_head children_node;
struct list_head kmem_caches_node;
struct percpu_ref refcnt;
void (*work_fn)(struct kmem_cache *);
union {
struct rcu_head rcu_head;
struct work_struct work;
};
};
};
}; };
#endif /* CONFIG_SLOB */ #endif /* CONFIG_SLOB */
...@@ -236,8 +195,6 @@ bool __kmem_cache_empty(struct kmem_cache *); ...@@ -236,8 +195,6 @@ bool __kmem_cache_empty(struct kmem_cache *);
int __kmem_cache_shutdown(struct kmem_cache *); int __kmem_cache_shutdown(struct kmem_cache *);
void __kmem_cache_release(struct kmem_cache *); void __kmem_cache_release(struct kmem_cache *);
int __kmem_cache_shrink(struct kmem_cache *); int __kmem_cache_shrink(struct kmem_cache *);
void __kmemcg_cache_deactivate(struct kmem_cache *s);
void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s);
void slab_kmem_cache_release(struct kmem_cache *); void slab_kmem_cache_release(struct kmem_cache *);
void kmem_cache_shrink_all(struct kmem_cache *s); void kmem_cache_shrink_all(struct kmem_cache *s);
...@@ -311,14 +268,6 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla ...@@ -311,14 +268,6 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
extern struct list_head slab_root_caches; extern struct list_head slab_root_caches;
#define root_caches_node memcg_params.__root_caches_node #define root_caches_node memcg_params.__root_caches_node
/*
* Iterate over all memcg caches of the given root cache. The caller must hold
* slab_mutex.
*/
#define for_each_memcg_cache(iter, root) \
list_for_each_entry(iter, &(root)->memcg_params.children, \
memcg_params.children_node)
static inline bool is_root_cache(struct kmem_cache *s) static inline bool is_root_cache(struct kmem_cache *s)
{ {
return !s->memcg_params.root_cache; return !s->memcg_params.root_cache;
...@@ -349,6 +298,13 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) ...@@ -349,6 +298,13 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
return s->memcg_params.root_cache; return s->memcg_params.root_cache;
} }
static inline struct kmem_cache *memcg_cache(struct kmem_cache *s)
{
if (is_root_cache(s))
return s->memcg_params.memcg_cache;
return NULL;
}
static inline struct obj_cgroup **page_obj_cgroups(struct page *page) static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
{ {
/* /*
...@@ -361,25 +317,9 @@ static inline struct obj_cgroup **page_obj_cgroups(struct page *page) ...@@ -361,25 +317,9 @@ static inline struct obj_cgroup **page_obj_cgroups(struct page *page)
((unsigned long)page->obj_cgroups & ~0x1UL); ((unsigned long)page->obj_cgroups & ~0x1UL);
} }
/* static inline bool page_has_obj_cgroups(struct page *page)
* Expects a pointer to a slab page. Please note, that PageSlab() check
* isn't sufficient, as it returns true also for tail compound slab pages,
* which do not have slab_cache pointer set.
* So this function assumes that the page can pass PageSlab() && !PageTail()
* check.
*
* The kmem_cache can be reparented asynchronously. The caller must ensure
* the memcg lifetime, e.g. by taking rcu_read_lock() or cgroup_mutex.
*/
static inline struct mem_cgroup *memcg_from_slab_page(struct page *page)
{ {
struct kmem_cache *s; return ((unsigned long)page->obj_cgroups & 0x1UL);
s = READ_ONCE(page->slab_cache);
if (s && !is_root_cache(s))
return READ_ONCE(s->memcg_params.memcg);
return NULL;
} }
static inline int memcg_alloc_page_obj_cgroups(struct page *page, static inline int memcg_alloc_page_obj_cgroups(struct page *page,
...@@ -418,17 +358,25 @@ static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s, ...@@ -418,17 +358,25 @@ static inline struct kmem_cache *memcg_slab_pre_alloc_hook(struct kmem_cache *s,
size_t objects, gfp_t flags) size_t objects, gfp_t flags)
{ {
struct kmem_cache *cachep; struct kmem_cache *cachep;
struct obj_cgroup *objcg;
if (memcg_kmem_bypass())
return s;
cachep = memcg_kmem_get_cache(s, objcgp); cachep = memcg_kmem_get_cache(s);
if (is_root_cache(cachep)) if (is_root_cache(cachep))
return s; return s;
if (obj_cgroup_charge(*objcgp, flags, objects * obj_full_size(s))) { objcg = get_obj_cgroup_from_current();
obj_cgroup_put(*objcgp); if (!objcg)
memcg_kmem_put_cache(cachep); return s;
if (obj_cgroup_charge(objcg, flags, objects * obj_full_size(s))) {
obj_cgroup_put(objcg);
cachep = NULL; cachep = NULL;
} }
*objcgp = objcg;
return cachep; return cachep;
} }
...@@ -467,7 +415,6 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s, ...@@ -467,7 +415,6 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
} }
} }
obj_cgroup_put(objcg); obj_cgroup_put(objcg);
memcg_kmem_put_cache(s);
} }
static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page, static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
...@@ -491,7 +438,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page, ...@@ -491,7 +438,7 @@ static inline void memcg_slab_free_hook(struct kmem_cache *s, struct page *page,
} }
extern void slab_init_memcg_params(struct kmem_cache *); extern void slab_init_memcg_params(struct kmem_cache *);
extern void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg); extern void memcg_link_cache(struct kmem_cache *s);
#else /* CONFIG_MEMCG_KMEM */ #else /* CONFIG_MEMCG_KMEM */
...@@ -499,9 +446,6 @@ extern void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg); ...@@ -499,9 +446,6 @@ extern void memcg_link_cache(struct kmem_cache *s, struct mem_cgroup *memcg);
#define slab_root_caches slab_caches #define slab_root_caches slab_caches
#define root_caches_node list #define root_caches_node list
#define for_each_memcg_cache(iter, root) \
for ((void)(iter), (void)(root); 0; )
static inline bool is_root_cache(struct kmem_cache *s) static inline bool is_root_cache(struct kmem_cache *s)
{ {
return true; return true;
...@@ -523,7 +467,17 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s) ...@@ -523,7 +467,17 @@ static inline struct kmem_cache *memcg_root_cache(struct kmem_cache *s)
return s; return s;
} }
static inline struct mem_cgroup *memcg_from_slab_page(struct page *page) static inline struct kmem_cache *memcg_cache(struct kmem_cache *s)
{
return NULL;
}
static inline bool page_has_obj_cgroups(struct page *page)
{
return false;
}
static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
{ {
return NULL; return NULL;
} }
...@@ -560,8 +514,7 @@ static inline void slab_init_memcg_params(struct kmem_cache *s) ...@@ -560,8 +514,7 @@ static inline void slab_init_memcg_params(struct kmem_cache *s)
{ {
} }
static inline void memcg_link_cache(struct kmem_cache *s, static inline void memcg_link_cache(struct kmem_cache *s)
struct mem_cgroup *memcg)
{ {
} }
...@@ -582,17 +535,14 @@ static __always_inline int charge_slab_page(struct page *page, ...@@ -582,17 +535,14 @@ static __always_inline int charge_slab_page(struct page *page,
gfp_t gfp, int order, gfp_t gfp, int order,
struct kmem_cache *s) struct kmem_cache *s)
{ {
#ifdef CONFIG_MEMCG_KMEM
if (memcg_kmem_enabled() && !is_root_cache(s)) { if (memcg_kmem_enabled() && !is_root_cache(s)) {
int ret; int ret;
ret = memcg_alloc_page_obj_cgroups(page, s, gfp); ret = memcg_alloc_page_obj_cgroups(page, s, gfp);
if (ret) if (ret)
return ret; return ret;
percpu_ref_get_many(&s->memcg_params.refcnt, 1 << order);
} }
#endif
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
PAGE_SIZE << order); PAGE_SIZE << order);
return 0; return 0;
...@@ -601,12 +551,9 @@ static __always_inline int charge_slab_page(struct page *page, ...@@ -601,12 +551,9 @@ static __always_inline int charge_slab_page(struct page *page,
static __always_inline void uncharge_slab_page(struct page *page, int order, static __always_inline void uncharge_slab_page(struct page *page, int order,
struct kmem_cache *s) struct kmem_cache *s)
{ {
#ifdef CONFIG_MEMCG_KMEM if (memcg_kmem_enabled() && !is_root_cache(s))
if (memcg_kmem_enabled() && !is_root_cache(s)) {
memcg_free_page_obj_cgroups(page); memcg_free_page_obj_cgroups(page);
percpu_ref_put_many(&s->memcg_params.refcnt, 1 << order);
}
#endif
mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s), mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
-(PAGE_SIZE << order)); -(PAGE_SIZE << order));
} }
...@@ -749,9 +696,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) ...@@ -749,9 +696,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
void *slab_start(struct seq_file *m, loff_t *pos); void *slab_start(struct seq_file *m, loff_t *pos);
void *slab_next(struct seq_file *m, void *p, loff_t *pos); void *slab_next(struct seq_file *m, void *p, loff_t *pos);
void slab_stop(struct seq_file *m, void *p); void slab_stop(struct seq_file *m, void *p);
void *memcg_slab_start(struct seq_file *m, loff_t *pos);
void *memcg_slab_next(struct seq_file *m, void *p, loff_t *pos);
void memcg_slab_stop(struct seq_file *m, void *p);
int memcg_slab_show(struct seq_file *m, void *p); int memcg_slab_show(struct seq_file *m, void *p);
#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG) #if defined(CONFIG_SLAB) || defined(CONFIG_SLUB_DEBUG)
......
This diff is collapsed.
...@@ -4204,36 +4204,6 @@ int __kmem_cache_shrink(struct kmem_cache *s) ...@@ -4204,36 +4204,6 @@ int __kmem_cache_shrink(struct kmem_cache *s)
return ret; return ret;
} }
#ifdef CONFIG_MEMCG
void __kmemcg_cache_deactivate_after_rcu(struct kmem_cache *s)
{
/*
* Called with all the locks held after a sched RCU grace period.
* Even if @s becomes empty after shrinking, we can't know that @s
* doesn't have allocations already in-flight and thus can't
* destroy @s until the associated memcg is released.
*
* However, let's remove the sysfs files for empty caches here.
* Each cache has a lot of interface files which aren't
* particularly useful for empty draining caches; otherwise, we can
* easily end up with millions of unnecessary sysfs files on
* systems which have a lot of memory and transient cgroups.
*/
if (!__kmem_cache_shrink(s))
sysfs_slab_remove(s);
}
void __kmemcg_cache_deactivate(struct kmem_cache *s)
{
/*
* Disable empty slabs caching. Used to avoid pinning offline
* memory cgroups by kmem pages that can be freed.
*/
slub_set_cpu_partial(s, 0);
s->min_partial = 0;
}
#endif /* CONFIG_MEMCG */
static int slab_mem_going_offline_callback(void *arg) static int slab_mem_going_offline_callback(void *arg)
{ {
struct kmem_cache *s; struct kmem_cache *s;
...@@ -4390,7 +4360,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache) ...@@ -4390,7 +4360,7 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
} }
slab_init_memcg_params(s); slab_init_memcg_params(s);
list_add(&s->list, &slab_caches); list_add(&s->list, &slab_caches);
memcg_link_cache(s, NULL); memcg_link_cache(s);
return s; return s;
} }
...@@ -4458,7 +4428,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align, ...@@ -4458,7 +4428,8 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
s->object_size = max(s->object_size, size); s->object_size = max(s->object_size, size);
s->inuse = max(s->inuse, ALIGN(size, sizeof(void *))); s->inuse = max(s->inuse, ALIGN(size, sizeof(void *)));
for_each_memcg_cache(c, s) { c = memcg_cache(s);
if (c) {
c->object_size = s->object_size; c->object_size = s->object_size;
c->inuse = max(c->inuse, ALIGN(size, sizeof(void *))); c->inuse = max(c->inuse, ALIGN(size, sizeof(void *)));
} }
...@@ -5591,7 +5562,8 @@ static ssize_t slab_attr_store(struct kobject *kobj, ...@@ -5591,7 +5562,8 @@ static ssize_t slab_attr_store(struct kobject *kobj,
* directly either failed or succeeded, in which case we loop * directly either failed or succeeded, in which case we loop
* through the descendants with best-effort propagation. * through the descendants with best-effort propagation.
*/ */
for_each_memcg_cache(c, s) c = memcg_cache(s);
if (c)
attribute->store(c, buf, len); attribute->store(c, buf, len);
mutex_unlock(&slab_mutex); mutex_unlock(&slab_mutex);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment