Commit 82369553 authored by Hugh Dickins's avatar Hugh Dickins Committed by Linus Torvalds

memcgroup: fix hang with shmem/tmpfs

The memcgroup regime relies upon a cgroup reclaiming pages from itself within
add_to_page_cache: which may involve some waiting.  Whereas shmem and tmpfs
rely upon using add_to_page_cache while holding a spinlock: when it cannot
wait.  The consequence is that when a cgroup reaches its limit, shmem_getpage
just hangs - unless there is outside memory pressure too, neither kswapd nor
radix_tree_preload get it out of the retry loop.

In most cases we can mem_cgroup_cache_charge the page waitably first, to
attach the page_cgroup in advance, so add_to_page_cache will do no more than
increment a count; then mem_cgroup_uncharge_page after (in both success and
failure cases) to balance the books again.

And where there used to be a congestion_wait for kswapd (recently made
redundant by radix_tree_preload), use mem_cgroup_cache_charge with NULL page
to go through a cycle of allocation and freeing, without accounting to any
particular page, and without updating the statistics vector.  This brings the
cgroup below its limit so the next try usually succeeds.
Signed-off-by: default avatarHugh Dickins <hugh@veritas.com>
Cc: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 3be91277
...@@ -329,10 +329,12 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, ...@@ -329,10 +329,12 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
* with it * with it
*/ */
retry: retry:
if (page) {
lock_page_cgroup(page); lock_page_cgroup(page);
pc = page_get_page_cgroup(page); pc = page_get_page_cgroup(page);
/* /*
* The page_cgroup exists and the page has already been accounted * The page_cgroup exists and
* the page has already been accounted.
*/ */
if (pc) { if (pc) {
if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) { if (unlikely(!atomic_inc_not_zero(&pc->ref_cnt))) {
...@@ -346,6 +348,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, ...@@ -346,6 +348,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
} }
} }
unlock_page_cgroup(page); unlock_page_cgroup(page);
}
pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
if (pc == NULL) if (pc == NULL)
...@@ -404,7 +407,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, ...@@ -404,7 +407,7 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
pc->flags |= PAGE_CGROUP_FLAG_CACHE; pc->flags |= PAGE_CGROUP_FLAG_CACHE;
if (page_cgroup_assign_new_page_cgroup(page, pc)) { if (!page || page_cgroup_assign_new_page_cgroup(page, pc)) {
/* /*
* Another charge has been added to this page already. * Another charge has been added to this page already.
* We take lock_page_cgroup(page) again and read * We take lock_page_cgroup(page) again and read
...@@ -413,6 +416,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, ...@@ -413,6 +416,8 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm,
res_counter_uncharge(&mem->res, PAGE_SIZE); res_counter_uncharge(&mem->res, PAGE_SIZE);
css_put(&mem->css); css_put(&mem->css);
kfree(pc); kfree(pc);
if (!page)
goto done;
goto retry; goto retry;
} }
......
...@@ -912,9 +912,13 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s ...@@ -912,9 +912,13 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
error = 1; error = 1;
if (!inode) if (!inode)
goto out; goto out;
error = radix_tree_preload(GFP_KERNEL); /* Precharge page while we can wait, compensate afterwards */
error = mem_cgroup_cache_charge(page, current->mm, GFP_KERNEL);
if (error) if (error)
goto out; goto out;
error = radix_tree_preload(GFP_KERNEL);
if (error)
goto uncharge;
error = 1; error = 1;
spin_lock(&info->lock); spin_lock(&info->lock);
...@@ -947,6 +951,8 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s ...@@ -947,6 +951,8 @@ static int shmem_unuse_inode(struct shmem_inode_info *info, swp_entry_t entry, s
shmem_swp_unmap(ptr); shmem_swp_unmap(ptr);
spin_unlock(&info->lock); spin_unlock(&info->lock);
radix_tree_preload_end(); radix_tree_preload_end();
uncharge:
mem_cgroup_uncharge_page(page);
out: out:
unlock_page(page); unlock_page(page);
page_cache_release(page); page_cache_release(page);
...@@ -1308,6 +1314,13 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, ...@@ -1308,6 +1314,13 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
spin_unlock(&info->lock); spin_unlock(&info->lock);
unlock_page(swappage); unlock_page(swappage);
page_cache_release(swappage); page_cache_release(swappage);
if (error == -ENOMEM) {
/* allow reclaim from this memory cgroup */
error = mem_cgroup_cache_charge(NULL,
current->mm, gfp & ~__GFP_HIGHMEM);
if (error)
goto failed;
}
goto repeat; goto repeat;
} }
} else if (sgp == SGP_READ && !filepage) { } else if (sgp == SGP_READ && !filepage) {
...@@ -1353,6 +1366,17 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, ...@@ -1353,6 +1366,17 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
goto failed; goto failed;
} }
/* Precharge page while we can wait, compensate after */
error = mem_cgroup_cache_charge(filepage, current->mm,
gfp & ~__GFP_HIGHMEM);
if (error) {
page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1);
filepage = NULL;
goto failed;
}
spin_lock(&info->lock); spin_lock(&info->lock);
entry = shmem_swp_alloc(info, idx, sgp); entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry)) if (IS_ERR(entry))
...@@ -1364,6 +1388,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, ...@@ -1364,6 +1388,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
if (error || swap.val || 0 != add_to_page_cache_lru( if (error || swap.val || 0 != add_to_page_cache_lru(
filepage, mapping, idx, GFP_NOWAIT)) { filepage, mapping, idx, GFP_NOWAIT)) {
spin_unlock(&info->lock); spin_unlock(&info->lock);
mem_cgroup_uncharge_page(filepage);
page_cache_release(filepage); page_cache_release(filepage);
shmem_unacct_blocks(info->flags, 1); shmem_unacct_blocks(info->flags, 1);
shmem_free_blocks(inode, 1); shmem_free_blocks(inode, 1);
...@@ -1372,6 +1397,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx, ...@@ -1372,6 +1397,7 @@ static int shmem_getpage(struct inode *inode, unsigned long idx,
goto failed; goto failed;
goto repeat; goto repeat;
} }
mem_cgroup_uncharge_page(filepage);
info->flags |= SHMEM_PAGEIN; info->flags |= SHMEM_PAGEIN;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment