Commit bd134f27 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] don't allow background writes to hide dirty buffers

If pdflush hits a locked-and-clean buffer in __block_write_full_page() it
will just pass over the buffer.  Typically the buffer is an ext3 data=ordered
buffer which is being written by kjournald, but a similar thing can happen
with blockdev buffers and ll_rw_block().

This is bad because the buffer is still under I/O and a subsequent fsync's
fdatawait() needs to know about it.

It is not practical to tag the page for writeback - only the submitter of the
I/O can do that, because the submitter has control of the end_io handler.

So instead, redirty the page so a subsequent fsync's fdatawrite() will wait on
the underway I/O.

There is a risk that pdflush::background_writeout() will lock up, repeatedly
trying and failing to write the same page.  This is prevented by ensuring
that background_writeout() always throttles when it made no progress.
parent d3eb546e
......@@ -1802,14 +1802,18 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
get_bh(bh);
if (!buffer_mapped(bh))
continue;
if (wbc->sync_mode != WB_SYNC_NONE) {
/*
* If it's a fully non-blocking write attempt and we cannot
* lock the buffer then redirty the page. Note that this can
* potentially cause a busy-wait loop from pdflush and kswapd
* activity, but those code paths have their own higher-level
* throttling.
*/
if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
lock_buffer(bh);
} else {
if (test_set_buffer_locked(bh)) {
if (buffer_dirty(bh))
__set_page_dirty_nobuffers(page);
continue;
}
} else if (test_set_buffer_locked(bh)) {
__set_page_dirty_nobuffers(page);
continue;
}
if (test_clear_buffer_dirty(bh)) {
if (!buffer_uptodate(bh))
......@@ -1857,6 +1861,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
if (uptodate)
SetPageUptodate(page);
end_page_writeback(page);
wbc->pages_skipped++; /* We didn't write this page */
}
return err;
......
......@@ -279,6 +279,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
struct inode, i_list);
struct address_space *mapping = inode->i_mapping;
struct backing_dev_info *bdi = mapping->backing_dev_info;
long pages_skipped;
if (bdi->memory_backed) {
if (sb == blockdev_superblock) {
......@@ -326,6 +327,7 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
BUG_ON(inode->i_state & I_FREEING);
__iget(inode);
pages_skipped = wbc->pages_skipped;
__writeback_single_inode(inode, wbc);
if (wbc->sync_mode == WB_SYNC_HOLD) {
inode->dirtied_when = jiffies;
......@@ -333,6 +335,13 @@ sync_sb_inodes(struct super_block *sb, struct writeback_control *wbc)
}
if (current_is_pdflush())
writeback_release(bdi);
if (wbc->pages_skipped != pages_skipped) {
/*
* writeback is not making progress due to locked
* buffers. Skip this inode for now.
*/
list_move(&inode->i_list, &sb->s_dirty);
}
spin_unlock(&inode_lock);
iput(inode);
spin_lock(&inode_lock);
......
......@@ -39,6 +39,7 @@ struct writeback_control {
older than this */
long nr_to_write; /* Write this many pages, and decrement
this for each page written */
long pages_skipped; /* Pages which were not written */
int nonblocking; /* Don't get stuck on request queues */
int encountered_congestion; /* An output: a queue is full */
int for_kupdate; /* A kupdate writeback */
......
......@@ -261,13 +261,13 @@ static void background_writeout(unsigned long _min_pages)
break;
wbc.encountered_congestion = 0;
wbc.nr_to_write = MAX_WRITEBACK_PAGES;
wbc.pages_skipped = 0;
writeback_inodes(&wbc);
min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
if (wbc.nr_to_write > 0) {
if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
/* Wrote less than expected */
if (wbc.encountered_congestion)
blk_congestion_wait(WRITE, HZ/10);
else
blk_congestion_wait(WRITE, HZ/10);
if (!wbc.encountered_congestion)
break;
}
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment