[PATCH] shrink_slab: improved handling of GFP_NOFS allocations

Currently, shrink_slab() will decide that it needs to scan a certain number of dentries, will call shrink_dcache_memory() requesting that this be done, and shrink_dcache_memory() will simply bale out without doing anything because the caller did not have __GFP_FS. This has the potential to disrupt our lovely pagecache-vs-slab balancing act. So change things so that shrinker callouts can return -1, indicating that they baled out. This way, shrink_slab can remember that this slab was owed a certain number of scannings and these will be correctly performed next time a __GFP_FS caller comes by.

[PATCH] shrink_slab: improved handling of GFP_NOFS allocations
Currently, shrink_slab() will decide that it needs to scan a certain number of dentries, will call shrink_dcache_memory() requesting that this be done, and shrink_dcache_memory() will simply bale out without doing anything because the caller did not have __GFP_FS. This has the potential to disrupt our lovely pagecache-vs-slab balancing act. So change things so that shrinker callouts can return -1, indicating that they baled out. This way, shrink_slab can remember that this slab was owed a certain number of scannings and these will be correctly performed next time a __GFP_FS caller comes by.
edb41998 · Andrew Morton · Linus Torvalds · b528cea7 · edb41998 · edb41998
Commit edb41998 authored May 09, 2004 by Andrew Morton Committed by Linus Torvalds May 09, 2004
Hide whitespace changes
Inline Side-by-side

Showing with 30 additions and 28 deletions

fs/dcache.c fs/dcache.c +13 -14

mm/vmscan.c mm/vmscan.c +17 -14

No files found.
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -643,24 +643,23 @@ void shrink_dcache_anon(struct hlist_head *head)
 }

 /*
- * This is called from kswapd when we think we need some more memory.
+ * Scan `nr' dentries and return the number which remain.
+ *
+ * We need to avoid reentering the filesystem if the caller is performing a
+ * GFP_NOFS allocation attempt.  One example deadlock is:
+ *
+ * ext2_new_block->getblk->GFP->shrink_dcache_memory->prune_dcache->
+ * prune_one_dentry->dput->dentry_iput->iput->inode->i_sb->s_op->put_inode->
+ * ext2_discard_prealloc->ext2_free_blocks->lock_super->DEADLOCK.
+ *
+ * In this case we return -1 to tell the caller that we baled.
 */
 static int shrink_dcache_memory(int nr, unsigned int gfp_mask)
 {
 	if (nr) {
-		/*
-		 * Nasty deadlock avoidance.
-		 *
-	 	 * ext2_new_block->getblk->GFP->shrink_dcache_memory->
-		 * prune_dcache->prune_one_dentry->dput->dentry_iput->iput->
-		 * inode->i_sb->s_op->put_inode->ext2_discard_prealloc->
-		 * ext2_free_blocks->lock_super->DEADLOCK.
-	 	 *
-	 	 * We should make sure we don't hold the superblock lock over
-	 	 * block allocations, but for now:
-		 */
-		if (gfp_mask & __GFP_FS)
-			prune_dcache(nr);
+		if (!(gfp_mask & __GFP_FS))
+			return -1;
+		prune_dcache(nr);
 	}
 	return dentry_stat.nr_unused;
 }

--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -153,20 +153,23 @@ static int shrink_slab(unsigned long scanned, unsigned int gfp_mask)
 		delta *= (*shrinker->shrinker)(0, gfp_mask);
 		do_div(delta, pages + 1);
 		shrinker->nr += delta;
-		if (shrinker->nr > SHRINK_BATCH) {
-			long nr_to_scan = shrinker->nr;
-
-			shrinker->nr = 0;
-			mod_page_state(slabs_scanned, nr_to_scan);
-			while (nr_to_scan) {
-				long this_scan = nr_to_scan;
-
-				if (this_scan > 128)
-					this_scan = 128;
-				(*shrinker->shrinker)(this_scan, gfp_mask);
-				nr_to_scan -= this_scan;
-				cond_resched();
-			}
+		if (shrinker->nr < 0)
+			shrinker->nr = LONG_MAX;	/* It wrapped! */
+
+		if (shrinker->nr <= SHRINK_BATCH)
+			continue;
+		while (shrinker->nr) {
+			long this_scan = shrinker->nr;
+			int shrink_ret;
+
+			if (this_scan > 128)
+				this_scan = 128;
+			shrink_ret = (*shrinker->shrinker)(this_scan, gfp_mask);
+			mod_page_state(slabs_scanned, this_scan);
+			shrinker->nr -= this_scan;
+			if (shrink_ret == -1)
+				break;
+			cond_resched();
 		}
 	}
 	up(&shrinker_sem);