[PATCH] reduce the dirty threshold when there's a lot of mapped

Dirty memory thresholds are currently set by /proc/sys/vm/dirty_ratio. Background writeout levels are controlled by /proc/sys/vm/dirty_background_ratio. Problem is that these levels are hard to get right - they are too static. If there is a lot of mapped memory around then the 40% clamping level causes too much dirty data. We do lots of scanning in page reclaim, and the VM generally starts getting into distress. Extra swapping, extra page unmapping. It would be much better to simply tell the caller of write(2) to slow down - to write out their dirty data sooner, to make those written pages trivially reclaimable. Penalise the offender, not the innocent page allocators. This patch changes the writer throttling code so that we clamp down much harder on writers if there is a lot of mapped memory in the machine. We only permit memory dirtiers to dirty up to 50% of unmapped memory before forcing them to clean their own pagecache.

[PATCH] reduce the dirty threshold when there's a lot of mapped
Dirty memory thresholds are currently set by /proc/sys/vm/dirty_ratio. Background writeout levels are controlled by /proc/sys/vm/dirty_background_ratio. Problem is that these levels are hard to get right - they are too static. If there is a lot of mapped memory around then the 40% clamping level causes too much dirty data. We do lots of scanning in page reclaim, and the VM generally starts getting into distress. Extra swapping, extra page unmapping. It would be much better to simply tell the caller of write(2) to slow down - to write out their dirty data sooner, to make those written pages trivially reclaimable. Penalise the offender, not the innocent page allocators. This patch changes the writer throttling code so that we clamp down much harder on writers if there is a lot of mapped memory in the machine. We only permit memory dirtiers to dirty up to 50% of unmapped memory before forcing them to clean their own pagecache.
3f0a2357 · Andrew Morton · Linus Torvalds · bf175bc4 · 3f0a2357
Commit 3f0a2357 authored Oct 12, 2002 by Andrew Morton Committed by Linus Torvalds Oct 12, 2002
Hide whitespace changes
Inline Side-by-side

Showing with 48 additions and 10 deletions

mm/page-writeback.c mm/page-writeback.c +48 -10

No files found.
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -83,6 +83,49 @@ int dirty_expire_centisecs = 30 * 100;

 static void background_writeout(unsigned long _min_pages);

+/*
+ * Work out the current dirty-memory clamping and background writeout
+ * thresholds.
+ *
+ * The main aim here is to lower them aggressively if there is a lot of mapped
+ * memory around.  To avoid stressing page reclaim with lots of unreclaimable
+ * pages.  It is better to clamp down on writers than to start swapping, and
+ * performing lots of scanning.
+ *
+ * We only allow 1/2 of the currently-unmapped memory to be dirtied.
+ *
+ * We don't permit the clamping level to fall below 5% - that is getting rather
+ * excessive.
+ *
+ * We make sure that the background writeout level is below the adjusted
+ * clamping level.
+ */
+static void
+get_dirty_limits(struct page_state *ps, long *background, long *dirty)
+{
+	int background_ratio;		/* Percentages */
+	int dirty_ratio;
+	int unmapped_ratio;
+
+	get_page_state(ps);
+
+	unmapped_ratio = 100 - (ps->nr_mapped * 100) / total_pages;
+
+	dirty_ratio = vm_dirty_ratio;
+	if (dirty_ratio > unmapped_ratio / 2)
+		dirty_ratio = unmapped_ratio / 2;
+
+	if (dirty_ratio < 5)
+		dirty_ratio = 5;
+
+	background_ratio = dirty_background_ratio;
+	if (background_ratio >= dirty_ratio)
+		background_ratio = dirty_ratio / 2;
+
+	*background = (background_ratio * total_pages) / 100;
+	*dirty = (dirty_ratio * total_pages) / 100;
+}
+
 /*
 * balance_dirty_pages() must be called by processes which are generating dirty
 * data.  It looks at the number of dirty pages in the machine and will force
@@ -97,11 +140,7 @@ void balance_dirty_pages(struct address_space *mapping)
 	long dirty_thresh;
 	struct backing_dev_info *bdi = mapping->backing_dev_info;

-	background_thresh = (dirty_background_ratio * total_pages) / 100;
-	dirty_thresh = (vm_dirty_ratio * total_pages) / 100;
-
-	get_page_state(&ps);
-
+	get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
 	while (ps.nr_dirty + ps.nr_writeback > dirty_thresh) {
 		struct writeback_control wbc = {
 			.bdi		= bdi,
@@ -115,7 +154,7 @@ void balance_dirty_pages(struct address_space *mapping)
 		if (ps.nr_dirty)
 			writeback_inodes(&wbc);

-		get_page_state(&ps);
+		get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
 		if (ps.nr_dirty + ps.nr_writeback <= dirty_thresh)
 			break;
 		blk_congestion_wait(WRITE, HZ/10);
@@ -170,7 +209,6 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
 static void background_writeout(unsigned long _min_pages)
 {
 	long min_pages = _min_pages;
-	long background_thresh;
 	struct writeback_control wbc = {
 		.bdi		= NULL,
 		.sync_mode	= WB_SYNC_NONE,
@@ -180,12 +218,12 @@ static void background_writeout(unsigned long _min_pages)
 	};

 	CHECK_EMERGENCY_SYNC
-
-	background_thresh = (dirty_background_ratio * total_pages) / 100;
 	for ( ; ; ) {
 		struct page_state ps;
+		long background_thresh;
+		long dirty_thresh;

-		get_page_state(&ps);
+		get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
 		if (ps.nr_dirty < background_thresh && min_pages <= 0)
 			break;
 		wbc.encountered_congestion = 0;