Commit cb731d6c authored by Vladimir Davydov's avatar Vladimir Davydov Committed by Linus Torvalds

vmscan: per memory cgroup slab shrinkers

This patch adds SHRINKER_MEMCG_AWARE flag.  If a shrinker has this flag
set, it will be called per memory cgroup.  The memory cgroup to scan
objects from is passed in shrink_control->memcg.  If the memory cgroup
is NULL, a memcg aware shrinker is supposed to scan objects from the
global list.  Unaware shrinkers are only called on global pressure with
memcg=NULL.
Signed-off-by: default avatarVladimir Davydov <vdavydov@parallels.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 4101b624
...@@ -37,20 +37,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) ...@@ -37,20 +37,6 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
iput(toput_inode); iput(toput_inode);
} }
static void drop_slab(void)
{
int nr_objects;
do {
int nid;
nr_objects = 0;
for_each_online_node(nid)
nr_objects += shrink_node_slabs(GFP_KERNEL, nid,
1000, 1000);
} while (nr_objects > 10);
}
int drop_caches_sysctl_handler(struct ctl_table *table, int write, int drop_caches_sysctl_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos) void __user *buffer, size_t *length, loff_t *ppos)
{ {
......
...@@ -413,6 +413,8 @@ static inline bool memcg_kmem_enabled(void) ...@@ -413,6 +413,8 @@ static inline bool memcg_kmem_enabled(void)
return static_key_false(&memcg_kmem_enabled_key); return static_key_false(&memcg_kmem_enabled_key);
} }
bool memcg_kmem_is_active(struct mem_cgroup *memcg);
/* /*
* In general, we'll do everything in our power to not incur in any overhead * In general, we'll do everything in our power to not incur in any overhead
* for non-memcg users for the kmem functions. Not even a function call, if we * for non-memcg users for the kmem functions. Not even a function call, if we
...@@ -542,6 +544,11 @@ static inline bool memcg_kmem_enabled(void) ...@@ -542,6 +544,11 @@ static inline bool memcg_kmem_enabled(void)
return false; return false;
} }
static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg)
{
return false;
}
static inline bool static inline bool
memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
{ {
......
...@@ -2168,9 +2168,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int, ...@@ -2168,9 +2168,8 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *); void __user *, size_t *, loff_t *);
#endif #endif
unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, void drop_slab(void);
unsigned long nr_scanned, void drop_slab_node(int nid);
unsigned long nr_eligible);
#ifndef CONFIG_MMU #ifndef CONFIG_MMU
#define randomize_va_space 0 #define randomize_va_space 0
......
...@@ -20,6 +20,9 @@ struct shrink_control { ...@@ -20,6 +20,9 @@ struct shrink_control {
/* current node being shrunk (for NUMA aware shrinkers) */ /* current node being shrunk (for NUMA aware shrinkers) */
int nid; int nid;
/* current memcg being shrunk (for memcg aware shrinkers) */
struct mem_cgroup *memcg;
}; };
#define SHRINK_STOP (~0UL) #define SHRINK_STOP (~0UL)
...@@ -61,7 +64,8 @@ struct shrinker { ...@@ -61,7 +64,8 @@ struct shrinker {
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */ #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
/* Flags */ /* Flags */
#define SHRINKER_NUMA_AWARE (1 << 0) #define SHRINKER_NUMA_AWARE (1 << 0)
#define SHRINKER_MEMCG_AWARE (1 << 1)
extern int register_shrinker(struct shrinker *); extern int register_shrinker(struct shrinker *);
extern void unregister_shrinker(struct shrinker *); extern void unregister_shrinker(struct shrinker *);
......
...@@ -352,7 +352,7 @@ struct mem_cgroup { ...@@ -352,7 +352,7 @@ struct mem_cgroup {
}; };
#ifdef CONFIG_MEMCG_KMEM #ifdef CONFIG_MEMCG_KMEM
static bool memcg_kmem_is_active(struct mem_cgroup *memcg) bool memcg_kmem_is_active(struct mem_cgroup *memcg)
{ {
return memcg->kmemcg_id >= 0; return memcg->kmemcg_id >= 0;
} }
......
...@@ -242,15 +242,8 @@ void shake_page(struct page *p, int access) ...@@ -242,15 +242,8 @@ void shake_page(struct page *p, int access)
* Only call shrink_node_slabs here (which would also shrink * Only call shrink_node_slabs here (which would also shrink
* other caches) if access is not potentially fatal. * other caches) if access is not potentially fatal.
*/ */
if (access) { if (access)
int nr; drop_slab_node(page_to_nid(p));
int nid = page_to_nid(p);
do {
nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000);
if (page_count(p) == 1)
break;
} while (nr > 10);
}
} }
EXPORT_SYMBOL_GPL(shake_page); EXPORT_SYMBOL_GPL(shake_page);
......
...@@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker); ...@@ -232,10 +232,10 @@ EXPORT_SYMBOL(unregister_shrinker);
#define SHRINK_BATCH 128 #define SHRINK_BATCH 128
static unsigned long shrink_slabs(struct shrink_control *shrinkctl, static unsigned long do_shrink_slab(struct shrink_control *shrinkctl,
struct shrinker *shrinker, struct shrinker *shrinker,
unsigned long nr_scanned, unsigned long nr_scanned,
unsigned long nr_eligible) unsigned long nr_eligible)
{ {
unsigned long freed = 0; unsigned long freed = 0;
unsigned long long delta; unsigned long long delta;
...@@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, ...@@ -344,9 +344,10 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
} }
/** /**
* shrink_node_slabs - shrink slab caches of a given node * shrink_slab - shrink slab caches
* @gfp_mask: allocation context * @gfp_mask: allocation context
* @nid: node whose slab caches to target * @nid: node whose slab caches to target
* @memcg: memory cgroup whose slab caches to target
* @nr_scanned: pressure numerator * @nr_scanned: pressure numerator
* @nr_eligible: pressure denominator * @nr_eligible: pressure denominator
* *
...@@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, ...@@ -355,6 +356,12 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
* @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set, * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
* unaware shrinkers will receive a node id of 0 instead. * unaware shrinkers will receive a node id of 0 instead.
* *
* @memcg specifies the memory cgroup to target. If it is not NULL,
* only shrinkers with SHRINKER_MEMCG_AWARE set will be called to scan
* objects from the memory cgroup specified. Otherwise all shrinkers
* are called, and memcg aware shrinkers are supposed to scan the
* global list then.
*
* @nr_scanned and @nr_eligible form a ratio that indicate how much of * @nr_scanned and @nr_eligible form a ratio that indicate how much of
* the available objects should be scanned. Page reclaim for example * the available objects should be scanned. Page reclaim for example
* passes the number of pages scanned and the number of pages on the * passes the number of pages scanned and the number of pages on the
...@@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl, ...@@ -365,13 +372,17 @@ static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
* *
* Returns the number of reclaimed slab objects. * Returns the number of reclaimed slab objects.
*/ */
unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
unsigned long nr_scanned, struct mem_cgroup *memcg,
unsigned long nr_eligible) unsigned long nr_scanned,
unsigned long nr_eligible)
{ {
struct shrinker *shrinker; struct shrinker *shrinker;
unsigned long freed = 0; unsigned long freed = 0;
if (memcg && !memcg_kmem_is_active(memcg))
return 0;
if (nr_scanned == 0) if (nr_scanned == 0)
nr_scanned = SWAP_CLUSTER_MAX; nr_scanned = SWAP_CLUSTER_MAX;
...@@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, ...@@ -390,12 +401,16 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
struct shrink_control sc = { struct shrink_control sc = {
.gfp_mask = gfp_mask, .gfp_mask = gfp_mask,
.nid = nid, .nid = nid,
.memcg = memcg,
}; };
if (memcg && !(shrinker->flags & SHRINKER_MEMCG_AWARE))
continue;
if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
sc.nid = 0; sc.nid = 0;
freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible); freed += do_shrink_slab(&sc, shrinker, nr_scanned, nr_eligible);
} }
up_read(&shrinker_rwsem); up_read(&shrinker_rwsem);
...@@ -404,6 +419,29 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid, ...@@ -404,6 +419,29 @@ unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
return freed; return freed;
} }
void drop_slab_node(int nid)
{
unsigned long freed;
do {
struct mem_cgroup *memcg = NULL;
freed = 0;
do {
freed += shrink_slab(GFP_KERNEL, nid, memcg,
1000, 1000);
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
} while (freed > 10);
}
void drop_slab(void)
{
int nid;
for_each_online_node(nid)
drop_slab_node(nid);
}
static inline int is_page_cache_freeable(struct page *page) static inline int is_page_cache_freeable(struct page *page)
{ {
/* /*
...@@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone, ...@@ -2276,6 +2314,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
static bool shrink_zone(struct zone *zone, struct scan_control *sc, static bool shrink_zone(struct zone *zone, struct scan_control *sc,
bool is_classzone) bool is_classzone)
{ {
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_reclaimed, nr_scanned; unsigned long nr_reclaimed, nr_scanned;
bool reclaimable = false; bool reclaimable = false;
...@@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, ...@@ -2294,6 +2333,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
memcg = mem_cgroup_iter(root, NULL, &reclaim); memcg = mem_cgroup_iter(root, NULL, &reclaim);
do { do {
unsigned long lru_pages; unsigned long lru_pages;
unsigned long scanned;
struct lruvec *lruvec; struct lruvec *lruvec;
int swappiness; int swappiness;
...@@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, ...@@ -2305,10 +2345,16 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
lruvec = mem_cgroup_zone_lruvec(zone, memcg); lruvec = mem_cgroup_zone_lruvec(zone, memcg);
swappiness = mem_cgroup_swappiness(memcg); swappiness = mem_cgroup_swappiness(memcg);
scanned = sc->nr_scanned;
shrink_lruvec(lruvec, swappiness, sc, &lru_pages); shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
zone_lru_pages += lru_pages; zone_lru_pages += lru_pages;
if (memcg && is_classzone)
shrink_slab(sc->gfp_mask, zone_to_nid(zone),
memcg, sc->nr_scanned - scanned,
lru_pages);
/* /*
* Direct reclaim and kswapd have to scan all memory * Direct reclaim and kswapd have to scan all memory
* cgroups to fulfill the overall scan target for the * cgroups to fulfill the overall scan target for the
...@@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc, ...@@ -2330,19 +2376,14 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc,
* Shrink the slab caches in the same proportion that * Shrink the slab caches in the same proportion that
* the eligible LRU pages were scanned. * the eligible LRU pages were scanned.
*/ */
if (global_reclaim(sc) && is_classzone) { if (global_reclaim(sc) && is_classzone)
struct reclaim_state *reclaim_state; shrink_slab(sc->gfp_mask, zone_to_nid(zone), NULL,
sc->nr_scanned - nr_scanned,
shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone), zone_lru_pages);
sc->nr_scanned - nr_scanned,
zone_lru_pages); if (reclaim_state) {
sc->nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state = current->reclaim_state; reclaim_state->reclaimed_slab = 0;
if (reclaim_state) {
sc->nr_reclaimed +=
reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0;
}
} }
vmpressure(sc->gfp_mask, sc->target_mem_cgroup, vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment