Commit 9ae30597 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] VM dirty page balancing

- The balance_dirty_pages() logic is simply wrong.  It goes:

	if (value > threshold)
		go_and_write(value - threshold);

  which is just fine for a single process writing data.  But
  for many processes, they *all* go and bring things back into
  balance, and too much data gets written out.

- The

	go_and_write(this much)

  logic is inoperative, because I turned off the ->writeback_mapping()
  function in ext2.  So a call to writeback_unlocked_inodes(this_much)
  doesn't actually decrement and test *this_much.  It will walk every
  inode, all the time.  Silly.

So quickly fixing the above things, the amount of dirty+writeback
memory in the machine nicely stabilises at 500 megabytes across
the run.
parent afae6f7c
......@@ -591,6 +591,8 @@ struct address_space_operations ext2_aops = {
commit_write: generic_commit_write,
bmap: ext2_bmap,
direct_IO: ext2_direct_IO,
writeback_mapping: generic_writeback_mapping,
vm_writeback: generic_vm_writeback,
};
/*
......
......@@ -159,7 +159,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
"SwapTotal: %8lu kB\n"
"SwapFree: %8lu kB\n"
"Dirty: %8lu kB\n"
"Locked: %8lu kB\n",
"Writeback: %8lu kB\n",
K(i.totalram),
K(i.freeram),
K(i.sharedram),
......@@ -175,7 +175,7 @@ static int meminfo_read_proc(char *page, char **start, off_t off,
K(i.totalswap),
K(i.freeswap),
K(ps.nr_dirty),
K(ps.nr_locked)
K(ps.nr_writeback)
);
return proc_calc_metrics(page, start, off, count, eof, len);
......
......@@ -13,7 +13,7 @@
*
* The PG_private bitflag is set if page->private contains a valid value.
*
* During disk I/O, PG_locked_dontuse is used. This bit is set before I/O and
* During disk I/O, PG_locked is used. This bit is set before I/O and
* reset when I/O completes. page_waitqueue(page) is a wait queue of all tasks
* waiting for the I/O on this page to complete.
*
......@@ -28,7 +28,7 @@
*
* Note that the referenced bit, the page->lru list_head and the active,
* inactive_dirty and inactive_clean lists are protected by the
* pagemap_lru_lock, and *NOT* by the usual PG_locked_dontuse bit!
* pagemap_lru_lock, and *NOT* by the usual PG_locked bit!
*
* PG_error is set to indicate that an I/O error occurred on this page.
*
......@@ -47,7 +47,7 @@
* locked- and dirty-page accounting. The top eight bits of page->flags are
* used for page->zone, so putting flag bits there doesn't work.
*/
#define PG_locked_dontuse 0 /* Page is locked. Don't touch. */
#define PG_locked 0 /* Page is locked. Don't touch. */
#define PG_error 1
#define PG_referenced 2
#define PG_uptodate 3
......@@ -71,7 +71,7 @@
*/
extern struct page_state {
unsigned long nr_dirty;
unsigned long nr_locked;
unsigned long nr_writeback;
unsigned long nr_pagecache;
} ____cacheline_aligned_in_smp page_states[NR_CPUS];
......@@ -91,37 +91,16 @@ extern void get_page_state(struct page_state *ret);
/*
* Manipulation of page state flags
*/
#define PageLocked(page) test_bit(PG_locked_dontuse, &(page)->flags)
#define PageLocked(page) \
test_bit(PG_locked, &(page)->flags)
#define SetPageLocked(page) \
do { \
if (!test_and_set_bit(PG_locked_dontuse, \
&(page)->flags)) \
inc_page_state(nr_locked); \
} while (0)
set_bit(PG_locked, &(page)->flags)
#define TestSetPageLocked(page) \
({ \
int ret; \
ret = test_and_set_bit(PG_locked_dontuse, \
&(page)->flags); \
if (!ret) \
inc_page_state(nr_locked); \
ret; \
})
test_and_set_bit(PG_locked, &(page)->flags)
#define ClearPageLocked(page) \
do { \
if (test_and_clear_bit(PG_locked_dontuse, \
&(page)->flags)) \
dec_page_state(nr_locked); \
} while (0)
clear_bit(PG_locked, &(page)->flags)
#define TestClearPageLocked(page) \
({ \
int ret; \
ret = test_and_clear_bit(PG_locked_dontuse, \
&(page)->flags); \
if (ret) \
dec_page_state(nr_locked); \
ret; \
})
test_and_clear_bit(PG_locked, &(page)->flags)
#define PageError(page) test_bit(PG_error, &(page)->flags)
#define SetPageError(page) set_bit(PG_error, &(page)->flags)
......@@ -201,12 +180,36 @@ extern void get_page_state(struct page_state *ret);
#define PagePrivate(page) test_bit(PG_private, &(page)->flags)
#define PageWriteback(page) test_bit(PG_writeback, &(page)->flags)
#define SetPageWriteback(page) set_bit(PG_writeback, &(page)->flags)
#define ClearPageWriteback(page) clear_bit(PG_writeback, &(page)->flags)
#define SetPageWriteback(page) \
do { \
if (!test_and_set_bit(PG_writeback, \
&(page)->flags)) \
inc_page_state(nr_writeback); \
} while (0)
#define TestSetPageWriteback(page) \
test_and_set_bit(PG_writeback, &(page)->flags)
({ \
int ret; \
ret = test_and_set_bit(PG_writeback, \
&(page)->flags); \
if (!ret) \
inc_page_state(nr_writeback); \
ret; \
})
#define ClearPageWriteback(page) \
do { \
if (test_and_clear_bit(PG_writeback, \
&(page)->flags)) \
dec_page_state(nr_writeback); \
} while (0)
#define TestClearPageWriteback(page) \
test_and_clear_bit(PG_writeback, &(page)->flags)
({ \
int ret; \
ret = test_and_clear_bit(PG_writeback, \
&(page)->flags); \
if (ret) \
dec_page_state(nr_writeback); \
ret; \
})
/*
* The PageSwapCache predicate doesn't use a PG_flag at this time,
......
......@@ -628,7 +628,7 @@ static void wait_on_page_bit(struct page *page, int bit_nr)
*/
void ___wait_on_page_locked(struct page *page)
{
wait_on_page_bit(page, PG_locked_dontuse);
wait_on_page_bit(page, PG_locked);
}
EXPORT_SYMBOL(___wait_on_page_locked);
......
......@@ -29,12 +29,12 @@
/*
* Start background writeback (via pdflush) at this level
*/
static int dirty_background_ratio = 30;
static int dirty_background_ratio = 40;
/*
* The generator of dirty data starts async writeback at this level
*/
static int dirty_async_ratio = 45;
static int dirty_async_ratio = 50;
/*
* The generator of dirty data performs sync writeout at this level
......@@ -62,25 +62,28 @@ void balance_dirty_pages(struct address_space *mapping)
int async_thresh;
int sync_thresh;
int wake_pdflush = 0;
unsigned long dirty_and_locked;
unsigned long dirty_and_writeback;
get_page_state(&ps);
dirty_and_locked = ps.nr_dirty + ps.nr_locked;
dirty_and_writeback = ps.nr_dirty + ps.nr_writeback;
background_thresh = (dirty_background_ratio * tot) / 100;
async_thresh = (dirty_async_ratio * tot) / 100;
sync_thresh = (dirty_sync_ratio * tot) / 100;
if (dirty_and_locked > sync_thresh) {
int nr_to_write = dirty_and_locked - async_thresh;
if (dirty_and_writeback > sync_thresh) {
int nr_to_write = 1500;
printk("sync thresh\n");
writeback_unlocked_inodes(&nr_to_write, WB_SYNC_LAST, NULL);
get_page_state(&ps);
dirty_and_writeback = ps.nr_dirty + ps.nr_writeback;
wake_pdflush = 1;
} else if (dirty_and_locked > async_thresh) {
int nr_to_write = dirty_and_locked - async_thresh;
} else if (dirty_and_writeback > async_thresh) {
int nr_to_write = 1500;
writeback_unlocked_inodes(&nr_to_write, WB_SYNC_NONE, NULL);
} else if (dirty_and_locked > background_thresh) {
} else if (dirty_and_writeback > background_thresh) {
wake_pdflush = 1;
}
......@@ -88,9 +91,8 @@ void balance_dirty_pages(struct address_space *mapping)
/*
* There is no flush thread against this device. Start one now.
*/
get_page_state(&ps);
if (ps.nr_dirty > 0) {
pdflush_flush(ps.nr_dirty);
if (dirty_and_writeback > async_thresh) {
pdflush_flush(dirty_and_writeback - async_thresh);
yield();
}
}
......@@ -109,7 +111,7 @@ void balance_dirty_pages_ratelimited(struct address_space *mapping)
preempt_disable();
cpu = smp_processor_id();
if (ratelimits[cpu].count++ >= 32) {
if (ratelimits[cpu].count++ >= 1000) {
ratelimits[cpu].count = 0;
preempt_enable();
balance_dirty_pages(mapping);
......
......@@ -584,7 +584,7 @@ void get_page_state(struct page_state *ret)
int pcpu;
ret->nr_dirty = 0;
ret->nr_locked = 0;
ret->nr_writeback = 0;
ret->nr_pagecache = 0;
for (pcpu = 0; pcpu < smp_num_cpus; pcpu++) {
......@@ -592,7 +592,7 @@ void get_page_state(struct page_state *ret)
ps = &page_states[cpu_logical_map(pcpu)];
ret->nr_dirty += ps->nr_dirty;
ret->nr_locked += ps->nr_locked;
ret->nr_writeback += ps->nr_writeback;
ret->nr_pagecache += ps->nr_pagecache;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment