[PATCH] use buffer_boundary() for writeback scheduling hints

This is the replacement for write_mapping_buffers(). Whenever the mpage code sees that it has just written a block which had buffer_boundary() set, it assumes that the next block is dirty filesystem metadata. (This is a good assumption - that's what buffer_boundary is for). So we do a lookup in the blockdev mapping for the next block and it if is present and dirty, then schedule it for IO. So the indirect blocks in the blockdev mapping get merged with the data blocks in the file mapping. This is a bit more general than the write_mapping_buffers() approach. write_mapping_buffers() required that the fs carefully maintain the correct buffers on the mapping->private_list, and that the fs call write_mapping_buffers(), and the implementation was generally rather yuk. This version will "just work" for filesystems which implement buffer_boundary correctly. Currently this is ext2, ext3 and some not-yet-merged reiserfs patches. JFS implements buffer_boundary() but does not use ext2-like layouts - so there will be no change there. Works nicely.

[PATCH] use buffer_boundary() for writeback scheduling hints
This is the replacement for write_mapping_buffers(). Whenever the mpage code sees that it has just written a block which had buffer_boundary() set, it assumes that the next block is dirty filesystem metadata. (This is a good assumption - that's what buffer_boundary is for). So we do a lookup in the blockdev mapping for the next block and it if is present and dirty, then schedule it for IO. So the indirect blocks in the blockdev mapping get merged with the data blocks in the file mapping. This is a bit more general than the write_mapping_buffers() approach. write_mapping_buffers() required that the fs carefully maintain the correct buffers on the mapping->private_list, and that the fs call write_mapping_buffers(), and the implementation was generally rather yuk. This version will "just work" for filesystems which implement buffer_boundary correctly. Currently this is ext2, ext3 and some not-yet-merged reiserfs patches. JFS implements buffer_boundary() but does not use ext2-like layouts - so there will be no change there. Works nicely.
343893e6 · Andrew Morton · Russell King · 4ac833da · 343893e6 · 343893e6
Commit 343893e6 authored Oct 04, 2002 by Andrew Morton Committed by Russell King Oct 04, 2002
Hide whitespace changes
Inline Side-by-side

Showing with 37 additions and 2 deletions

fs/buffer.c fs/buffer.c +17 -0

fs/mpage.c fs/mpage.c +17 -2

include/linux/buffer_head.h include/linux/buffer_head.h +3 -0

No files found.
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -735,6 +735,23 @@ int sync_mapping_buffers(struct address_space *mapping)
 }
 EXPORT_SYMBOL(sync_mapping_buffers);

+/*
+ * Called when we've recently written block `bblock', and it is known that
+ * `bblock' was for a buffer_boundary() buffer.  This means that the block at
+ * `bblock + 1' is probably a dirty indirect block.  Hunt it down and, if it's
+ * dirty, schedule it for IO.  So that indirects merge nicely with their data.
+ */
+void write_boundary_block(struct block_device *bdev,
+			sector_t bblock, unsigned blocksize)
+{
+	struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize);
+	if (bh) {
+		if (buffer_dirty(bh))
+			ll_rw_block(WRITE, 1, &bh);
+		put_bh(bh);
+	}
+}
+
 void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 {
 	struct address_space *mapping = inode->i_mapping;

--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -331,6 +331,8 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	unsigned first_unmapped = blocks_per_page;
 	struct block_device *bdev = NULL;
 	int boundary = 0;
+	sector_t boundary_block = 0;
+	struct block_device *boundary_bdev = NULL;

 	if (page_has_buffers(page)) {
 		struct buffer_head *head = page_buffers(page);
@@ -363,6 +365,10 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 			}
 			blocks[page_block++] = bh->b_blocknr;
 			boundary = buffer_boundary(bh);
+			if (boundary) {
+				boundary_block = bh->b_blocknr;
+				boundary_bdev = bh->b_bdev;
+			}
 			bdev = bh->b_bdev;
 		} while ((bh = bh->b_this_page) != head);

@@ -393,6 +399,10 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 		if (buffer_new(&map_bh))
 			unmap_underlying_metadata(map_bh.b_bdev,
 						map_bh.b_blocknr);
+		if (buffer_boundary(&map_bh)) {
+			boundary_block = map_bh.b_blocknr;
+			boundary_bdev = map_bh.b_bdev;
+		}
 		if (page_block) {
 			if (map_bh.b_blocknr != blocks[page_block-1] + 1)
 				goto confused;
@@ -464,10 +474,15 @@ mpage_writepage(struct bio *bio, struct page *page, get_block_t get_block,
 	BUG_ON(PageWriteback(page));
 	SetPageWriteback(page);
 	unlock_page(page);
-	if (boundary || (first_unmapped != blocks_per_page))
+	if (boundary || (first_unmapped != blocks_per_page)) {
 		bio = mpage_bio_submit(WRITE, bio);
-	else
+		if (boundary_block) {
+			write_boundary_block(boundary_bdev,
+					boundary_block, 1 << blkbits);
+		}
+	} else {
 		*last_block_in_bio = blocks[blocks_per_page - 1];
+	}
 	goto out;

 confused:

--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -167,6 +167,9 @@ void free_buffer_head(struct buffer_head * bh);
 void FASTCALL(unlock_buffer(struct buffer_head *bh));
 void ll_rw_block(int, int, struct buffer_head * bh[]);
 int submit_bh(int, struct buffer_head *);
+void write_boundary_block(struct block_device *bdev,
+			sector_t bblock, unsigned blocksize);
+
 extern int buffer_heads_over_limit;

 /*