Commit a3b2d692 authored by KAMEZAWA Hiroyuki's avatar KAMEZAWA Hiroyuki Committed by Linus Torvalds

cgroups: use css id in swap cgroup for saving memory v5

Try to use CSS ID for records in swap_cgroup.  By this, on 64bit machine,
size of swap_cgroup goes down to 2 bytes from 8bytes.

This means, when 2GB of swap is equipped, (assume the page size is 4096bytes)

	From size of swap_cgroup = 2G/4k * 8 = 4Mbytes.
	To   size of swap_cgroup = 2G/4k * 2 = 1Mbytes.

Reduction is large.  Of course, there are trade-offs.  This CSS ID will
add overhead to swap-in/swap-out/swap-free.

But in general,
  - swap is a resource which the user tend to avoid use.
  - If swap is never used, swap_cgroup area is not used.
  - Reading traditional manuals, size of swap should be proportional to
    size of memory. Memory size of machine is increasing now.

I think reducing size of swap_cgroup makes sense.

Note:
  - ID->CSS lookup routine has no locks, it's under RCU-Read-Side.
  - memcg can be obsolete at rmdir() but not freed while refcnt from
    swap_cgroup is available.

Changelog v4->v5:
 - reworked on to memcg-charge-swapcache-to-proper-memcg.patch
Changlog ->v4:
 - fixed not configured case.
 - deleted unnecessary comments.
 - fixed NULL pointer bug.
 - fixed message in dmesg.

[nishimura@mxp.nes.nec.co.jp: css_tryget can be called twice in !PageCgroupUsed case]
Signed-off-by: default avatarKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Paul Menage <menage@google.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: default avatarDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3c776e64
...@@ -91,24 +91,23 @@ static inline void page_cgroup_init(void) ...@@ -91,24 +91,23 @@ static inline void page_cgroup_init(void)
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
#include <linux/swap.h> #include <linux/swap.h>
extern struct mem_cgroup * extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem); extern unsigned short lookup_swap_cgroup(swp_entry_t ent);
extern struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent);
extern int swap_cgroup_swapon(int type, unsigned long max_pages); extern int swap_cgroup_swapon(int type, unsigned long max_pages);
extern void swap_cgroup_swapoff(int type); extern void swap_cgroup_swapoff(int type);
#else #else
#include <linux/swap.h> #include <linux/swap.h>
static inline static inline
struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{ {
return NULL; return 0;
} }
static inline static inline
struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) unsigned short lookup_swap_cgroup(swp_entry_t ent)
{ {
return NULL; return 0;
} }
static inline int static inline int
......
...@@ -991,10 +991,31 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, ...@@ -991,10 +991,31 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
return -ENOMEM; return -ENOMEM;
} }
/*
* A helper function to get mem_cgroup from ID. must be called under
* rcu_read_lock(). The caller must check css_is_removed() or some if
* it's concern. (dropping refcnt from swap can be called against removed
* memcg.)
*/
static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
{
struct cgroup_subsys_state *css;
/* ID 0 is unused ID */
if (!id)
return NULL;
css = css_lookup(&mem_cgroup_subsys, id);
if (!css)
return NULL;
return container_of(css, struct mem_cgroup, css);
}
static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
{ {
struct mem_cgroup *mem; struct mem_cgroup *mem;
struct page_cgroup *pc; struct page_cgroup *pc;
unsigned short id;
swp_entry_t ent; swp_entry_t ent;
VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageLocked(page));
...@@ -1006,16 +1027,19 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page) ...@@ -1006,16 +1027,19 @@ static struct mem_cgroup *try_get_mem_cgroup_from_swapcache(struct page *page)
/* /*
* Used bit of swapcache is solid under page lock. * Used bit of swapcache is solid under page lock.
*/ */
if (PageCgroupUsed(pc)) if (PageCgroupUsed(pc)) {
mem = pc->mem_cgroup; mem = pc->mem_cgroup;
else { if (mem && !css_tryget(&mem->css))
mem = NULL;
} else {
ent.val = page_private(page); ent.val = page_private(page);
mem = lookup_swap_cgroup(ent); id = lookup_swap_cgroup(ent);
rcu_read_lock();
mem = mem_cgroup_lookup(id);
if (mem && !css_tryget(&mem->css))
mem = NULL;
rcu_read_unlock();
} }
if (!mem)
return NULL;
if (!css_tryget(&mem->css))
return NULL;
return mem; return mem;
} }
...@@ -1276,12 +1300,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, ...@@ -1276,12 +1300,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
if (do_swap_account && !ret && PageSwapCache(page)) { if (do_swap_account && !ret && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)}; swp_entry_t ent = {.val = page_private(page)};
unsigned short id;
/* avoid double counting */ /* avoid double counting */
mem = swap_cgroup_record(ent, NULL); id = swap_cgroup_record(ent, 0);
rcu_read_lock();
mem = mem_cgroup_lookup(id);
if (mem) { if (mem) {
/*
* We did swap-in. Then, this entry is doubly counted
* both in mem and memsw. We uncharge it, here.
* Recorded ID can be obsolete. We avoid calling
* css_tryget()
*/
res_counter_uncharge(&mem->memsw, PAGE_SIZE); res_counter_uncharge(&mem->memsw, PAGE_SIZE);
mem_cgroup_put(mem); mem_cgroup_put(mem);
} }
rcu_read_unlock();
} }
return ret; return ret;
} }
...@@ -1346,13 +1380,21 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr) ...@@ -1346,13 +1380,21 @@ void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr)
*/ */
if (do_swap_account && PageSwapCache(page)) { if (do_swap_account && PageSwapCache(page)) {
swp_entry_t ent = {.val = page_private(page)}; swp_entry_t ent = {.val = page_private(page)};
unsigned short id;
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
memcg = swap_cgroup_record(ent, NULL);
id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
if (memcg) { if (memcg) {
/*
* This recorded memcg can be obsolete one. So, avoid
* calling css_tryget
*/
res_counter_uncharge(&memcg->memsw, PAGE_SIZE); res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg); mem_cgroup_put(memcg);
} }
rcu_read_unlock();
} }
/* add this page(page_cgroup) to the LRU we want. */ /* add this page(page_cgroup) to the LRU we want. */
...@@ -1473,7 +1515,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) ...@@ -1473,7 +1515,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
MEM_CGROUP_CHARGE_TYPE_SWAPOUT); MEM_CGROUP_CHARGE_TYPE_SWAPOUT);
/* record memcg information */ /* record memcg information */
if (do_swap_account && memcg) { if (do_swap_account && memcg) {
swap_cgroup_record(ent, memcg); swap_cgroup_record(ent, css_id(&memcg->css));
mem_cgroup_get(memcg); mem_cgroup_get(memcg);
} }
if (memcg) if (memcg)
...@@ -1488,15 +1530,23 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) ...@@ -1488,15 +1530,23 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
void mem_cgroup_uncharge_swap(swp_entry_t ent) void mem_cgroup_uncharge_swap(swp_entry_t ent)
{ {
struct mem_cgroup *memcg; struct mem_cgroup *memcg;
unsigned short id;
if (!do_swap_account) if (!do_swap_account)
return; return;
memcg = swap_cgroup_record(ent, NULL); id = swap_cgroup_record(ent, 0);
rcu_read_lock();
memcg = mem_cgroup_lookup(id);
if (memcg) { if (memcg) {
/*
* We uncharge this because swap is freed.
* This memcg can be obsolete one. We avoid calling css_tryget
*/
res_counter_uncharge(&memcg->memsw, PAGE_SIZE); res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
mem_cgroup_put(memcg); mem_cgroup_put(memcg);
} }
rcu_read_unlock();
} }
#endif #endif
......
...@@ -285,12 +285,8 @@ struct swap_cgroup_ctrl { ...@@ -285,12 +285,8 @@ struct swap_cgroup_ctrl {
struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
/*
* This 8bytes seems big..maybe we can reduce this when we can use "id" for
* cgroup rather than pointer.
*/
struct swap_cgroup { struct swap_cgroup {
struct mem_cgroup *val; unsigned short id;
}; };
#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup)) #define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
#define SC_POS_MASK (SC_PER_PAGE - 1) #define SC_POS_MASK (SC_PER_PAGE - 1)
...@@ -342,10 +338,10 @@ static int swap_cgroup_prepare(int type) ...@@ -342,10 +338,10 @@ static int swap_cgroup_prepare(int type)
* @ent: swap entry to be recorded into * @ent: swap entry to be recorded into
* @mem: mem_cgroup to be recorded * @mem: mem_cgroup to be recorded
* *
* Returns old value at success, NULL at failure. * Returns old value at success, 0 at failure.
* (Of course, old value can be NULL.) * (Of course, old value can be 0.)
*/ */
struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
{ {
int type = swp_type(ent); int type = swp_type(ent);
unsigned long offset = swp_offset(ent); unsigned long offset = swp_offset(ent);
...@@ -354,18 +350,18 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) ...@@ -354,18 +350,18 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
struct swap_cgroup_ctrl *ctrl; struct swap_cgroup_ctrl *ctrl;
struct page *mappage; struct page *mappage;
struct swap_cgroup *sc; struct swap_cgroup *sc;
struct mem_cgroup *old; unsigned short old;
if (!do_swap_account) if (!do_swap_account)
return NULL; return 0;
ctrl = &swap_cgroup_ctrl[type]; ctrl = &swap_cgroup_ctrl[type];
mappage = ctrl->map[idx]; mappage = ctrl->map[idx];
sc = page_address(mappage); sc = page_address(mappage);
sc += pos; sc += pos;
old = sc->val; old = sc->id;
sc->val = mem; sc->id = id;
return old; return old;
} }
...@@ -374,9 +370,9 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem) ...@@ -374,9 +370,9 @@ struct mem_cgroup *swap_cgroup_record(swp_entry_t ent, struct mem_cgroup *mem)
* lookup_swap_cgroup - lookup mem_cgroup tied to swap entry * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
* @ent: swap entry to be looked up. * @ent: swap entry to be looked up.
* *
* Returns pointer to mem_cgroup at success. NULL at failure. * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
*/ */
struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) unsigned short lookup_swap_cgroup(swp_entry_t ent)
{ {
int type = swp_type(ent); int type = swp_type(ent);
unsigned long offset = swp_offset(ent); unsigned long offset = swp_offset(ent);
...@@ -385,16 +381,16 @@ struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent) ...@@ -385,16 +381,16 @@ struct mem_cgroup *lookup_swap_cgroup(swp_entry_t ent)
struct swap_cgroup_ctrl *ctrl; struct swap_cgroup_ctrl *ctrl;
struct page *mappage; struct page *mappage;
struct swap_cgroup *sc; struct swap_cgroup *sc;
struct mem_cgroup *ret; unsigned short ret;
if (!do_swap_account) if (!do_swap_account)
return NULL; return 0;
ctrl = &swap_cgroup_ctrl[type]; ctrl = &swap_cgroup_ctrl[type];
mappage = ctrl->map[idx]; mappage = ctrl->map[idx];
sc = page_address(mappage); sc = page_address(mappage);
sc += pos; sc += pos;
ret = sc->val; ret = sc->id;
return ret; return ret;
} }
...@@ -432,7 +428,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages) ...@@ -432,7 +428,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
printk(KERN_INFO printk(KERN_INFO
"swap_cgroup: uses %ld bytes of vmalloc for pointer array space" "swap_cgroup: uses %ld bytes of vmalloc for pointer array space"
" and %ld bytes to hold mem_cgroup pointers on swap\n", " and %ld bytes to hold mem_cgroup information per swap ents\n",
array_size, length * PAGE_SIZE); array_size, length * PAGE_SIZE);
printk(KERN_INFO printk(KERN_INFO
"swap_cgroup can be disabled by noswapaccount boot option.\n"); "swap_cgroup can be disabled by noswapaccount boot option.\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment