Commit 3f0a2357 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] reduce the dirty threshold when there's a lot of mapped

Dirty memory thresholds are currently set by /proc/sys/vm/dirty_ratio.

Background writeout levels are controlled by
/proc/sys/vm/dirty_background_ratio.

Problem is that these levels are hard to get right - they are too
static.  If there is a lot of mapped memory around then the 40%
clamping level causes too much dirty data.  We do lots of scanning in
page reclaim, and the VM generally starts getting into distress.  Extra
swapping, extra page unmapping.

It would be much better to simply tell the caller of write(2) to slow
down - to write out their dirty data sooner, to make those written
pages trivially reclaimable.  Penalise the offender, not the innocent
page allocators.

This patch changes the writer throttling code so that we clamp down
much harder on writers if there is a lot of mapped memory in the
machine.  We only permit memory dirtiers to dirty up to 50% of unmapped
memory before forcing them to clean their own pagecache.
parent bf175bc4
......@@ -83,6 +83,49 @@ int dirty_expire_centisecs = 30 * 100;
static void background_writeout(unsigned long _min_pages);
/*
* Work out the current dirty-memory clamping and background writeout
* thresholds.
*
* The main aim here is to lower them aggressively if there is a lot of mapped
* memory around. To avoid stressing page reclaim with lots of unreclaimable
* pages. It is better to clamp down on writers than to start swapping, and
* performing lots of scanning.
*
* We only allow 1/2 of the currently-unmapped memory to be dirtied.
*
* We don't permit the clamping level to fall below 5% - that is getting rather
* excessive.
*
* We make sure that the background writeout level is below the adjusted
* clamping level.
*/
static void
get_dirty_limits(struct page_state *ps, long *background, long *dirty)
{
int background_ratio; /* Percentages */
int dirty_ratio;
int unmapped_ratio;
get_page_state(ps);
unmapped_ratio = 100 - (ps->nr_mapped * 100) / total_pages;
dirty_ratio = vm_dirty_ratio;
if (dirty_ratio > unmapped_ratio / 2)
dirty_ratio = unmapped_ratio / 2;
if (dirty_ratio < 5)
dirty_ratio = 5;
background_ratio = dirty_background_ratio;
if (background_ratio >= dirty_ratio)
background_ratio = dirty_ratio / 2;
*background = (background_ratio * total_pages) / 100;
*dirty = (dirty_ratio * total_pages) / 100;
}
/*
* balance_dirty_pages() must be called by processes which are generating dirty
* data. It looks at the number of dirty pages in the machine and will force
......@@ -97,11 +140,7 @@ void balance_dirty_pages(struct address_space *mapping)
long dirty_thresh;
struct backing_dev_info *bdi = mapping->backing_dev_info;
background_thresh = (dirty_background_ratio * total_pages) / 100;
dirty_thresh = (vm_dirty_ratio * total_pages) / 100;
get_page_state(&ps);
get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
while (ps.nr_dirty + ps.nr_writeback > dirty_thresh) {
struct writeback_control wbc = {
.bdi = bdi,
......@@ -115,7 +154,7 @@ void balance_dirty_pages(struct address_space *mapping)
if (ps.nr_dirty)
writeback_inodes(&wbc);
get_page_state(&ps);
get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
if (ps.nr_dirty + ps.nr_writeback <= dirty_thresh)
break;
blk_congestion_wait(WRITE, HZ/10);
......@@ -170,7 +209,6 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
static void background_writeout(unsigned long _min_pages)
{
long min_pages = _min_pages;
long background_thresh;
struct writeback_control wbc = {
.bdi = NULL,
.sync_mode = WB_SYNC_NONE,
......@@ -180,12 +218,12 @@ static void background_writeout(unsigned long _min_pages)
};
CHECK_EMERGENCY_SYNC
background_thresh = (dirty_background_ratio * total_pages) / 100;
for ( ; ; ) {
struct page_state ps;
long background_thresh;
long dirty_thresh;
get_page_state(&ps);
get_dirty_limits(&ps, &background_thresh, &dirty_thresh);
if (ps.nr_dirty < background_thresh && min_pages <= 0)
break;
wbc.encountered_congestion = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment