Commit 9eb76ee2 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] batched addition of pages to the LRU

The patch goes through the various places which were calling
lru_cache_add() against bulk pages and batches them up.

Also.  This whole patch series improves the behaviour of the system
under heavy writeback load.  There is a reduction in page allocation
failures, some reduction in loss of interactivity due to page
allocators getting stuck on writeback from the VM.  (This is still bad
though).

I think it's due to the change here in mpage_writepages().  That
function was originally unconditionally refiling written-back pages to
the head of the inactive list.  The theory being that they should be
moved out of the way of page allocators, who would end up waiting on
them.

It appears that this simply had the effect of pushing dirty, unwritten
data closer to the tail of the inactive list, making things worse.

So instead, if the caller is (typically) balance_dirty_pages() then
leave the pages where they are on the LRU.

If the caller is PF_MEMALLOC then the pages *have* to be refiled.  This
is because VM writeback is clustered along mapping->dirty_pages, and
it's almost certain that the pages which are being written are near the
tail of the LRU.  If they were left there, page allocators would block
on them too soon.  It would effectively become a synchronous write.
parent 823e0df8
...@@ -263,18 +263,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, ...@@ -263,18 +263,25 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
struct bio *bio = NULL; struct bio *bio = NULL;
unsigned page_idx; unsigned page_idx;
sector_t last_block_in_bio = 0; sector_t last_block_in_bio = 0;
struct pagevec lru_pvec;
pagevec_init(&lru_pvec);
for (page_idx = 0; page_idx < nr_pages; page_idx++) { for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, list); struct page *page = list_entry(pages->prev, struct page, list);
prefetchw(&page->flags); prefetchw(&page->flags);
list_del(&page->list); list_del(&page->list);
if (!add_to_page_cache(page, mapping, page->index)) if (!add_to_page_cache(page, mapping, page->index)) {
bio = do_mpage_readpage(bio, page, bio = do_mpage_readpage(bio, page,
nr_pages - page_idx, nr_pages - page_idx,
&last_block_in_bio, get_block); &last_block_in_bio, get_block);
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec);
} else {
page_cache_release(page); page_cache_release(page);
} }
}
pagevec_lru_add(&lru_pvec);
BUG_ON(!list_empty(pages)); BUG_ON(!list_empty(pages));
if (bio) if (bio)
mpage_bio_submit(READ, bio); mpage_bio_submit(READ, bio);
...@@ -566,7 +573,8 @@ mpage_writepages(struct address_space *mapping, ...@@ -566,7 +573,8 @@ mpage_writepages(struct address_space *mapping,
bio = mpage_writepage(bio, page, get_block, bio = mpage_writepage(bio, page, get_block,
&last_block_in_bio, &ret); &last_block_in_bio, &ret);
} }
if (!PageActive(page) && PageLRU(page)) { if ((current->flags & PF_MEMALLOC) &&
!PageActive(page) && PageLRU(page)) {
if (!pagevec_add(&pvec, page)) if (!pagevec_add(&pvec, page))
pagevec_deactivate_inactive(&pvec); pagevec_deactivate_inactive(&pvec);
page = NULL; page = NULL;
......
...@@ -58,6 +58,8 @@ extern struct page * read_cache_page(struct address_space *mapping, ...@@ -58,6 +58,8 @@ extern struct page * read_cache_page(struct address_space *mapping,
extern int add_to_page_cache(struct page *page, extern int add_to_page_cache(struct page *page,
struct address_space *mapping, unsigned long index); struct address_space *mapping, unsigned long index);
extern int add_to_page_cache_lru(struct page *page,
struct address_space *mapping, unsigned long index);
extern void remove_from_page_cache(struct page *page); extern void remove_from_page_cache(struct page *page);
extern void __remove_from_page_cache(struct page *page); extern void __remove_from_page_cache(struct page *page);
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
#include <linux/iobuf.h> #include <linux/iobuf.h>
#include <linux/hash.h> #include <linux/hash.h>
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/pagevec.h>
#include <linux/security.h> #include <linux/security.h>
/* /*
* This is needed for the following functions: * This is needed for the following functions:
...@@ -530,27 +531,37 @@ int filemap_fdatawait(struct address_space * mapping) ...@@ -530,27 +531,37 @@ int filemap_fdatawait(struct address_space * mapping)
* In the case of swapcache, try_to_swap_out() has already locked the page, so * In the case of swapcache, try_to_swap_out() has already locked the page, so
* SetPageLocked() is ugly-but-OK there too. The required page state has been * SetPageLocked() is ugly-but-OK there too. The required page state has been
* set up by swap_out_add_to_swap_cache(). * set up by swap_out_add_to_swap_cache().
*
* This function does not add the page to the LRU. The caller must do that.
*/ */
int add_to_page_cache(struct page *page, int add_to_page_cache(struct page *page,
struct address_space *mapping, unsigned long offset) struct address_space *mapping, pgoff_t offset)
{ {
int error; int error;
page_cache_get(page);
write_lock(&mapping->page_lock); write_lock(&mapping->page_lock);
error = radix_tree_insert(&mapping->page_tree, offset, page); error = radix_tree_insert(&mapping->page_tree, offset, page);
if (!error) { if (!error) {
SetPageLocked(page); SetPageLocked(page);
ClearPageDirty(page); ClearPageDirty(page);
___add_to_page_cache(page, mapping, offset); ___add_to_page_cache(page, mapping, offset);
page_cache_get(page); } else {
page_cache_release(page);
} }
write_unlock(&mapping->page_lock); write_unlock(&mapping->page_lock);
/* Anon pages are already on the LRU */
if (!error && !PageSwapCache(page))
lru_cache_add(page);
return error; return error;
} }
int add_to_page_cache_lru(struct page *page,
struct address_space *mapping, pgoff_t offset)
{
int ret = add_to_page_cache(page, mapping, offset);
if (ret == 0)
lru_cache_add(page);
return ret;
}
/* /*
* This adds the requested page to the page cache if it isn't already there, * This adds the requested page to the page cache if it isn't already there,
* and schedules an I/O to read in its contents from disk. * and schedules an I/O to read in its contents from disk.
...@@ -566,7 +577,7 @@ static int page_cache_read(struct file * file, unsigned long offset) ...@@ -566,7 +577,7 @@ static int page_cache_read(struct file * file, unsigned long offset)
if (!page) if (!page)
return -ENOMEM; return -ENOMEM;
error = add_to_page_cache(page, mapping, offset); error = add_to_page_cache_lru(page, mapping, offset);
if (!error) { if (!error) {
error = mapping->a_ops->readpage(file, page); error = mapping->a_ops->readpage(file, page);
page_cache_release(page); page_cache_release(page);
...@@ -797,7 +808,7 @@ struct page *find_or_create_page(struct address_space *mapping, ...@@ -797,7 +808,7 @@ struct page *find_or_create_page(struct address_space *mapping,
if (!cached_page) if (!cached_page)
return NULL; return NULL;
} }
err = add_to_page_cache(cached_page, mapping, index); err = add_to_page_cache_lru(cached_page, mapping, index);
if (!err) { if (!err) {
page = cached_page; page = cached_page;
cached_page = NULL; cached_page = NULL;
...@@ -830,7 +841,7 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index) ...@@ -830,7 +841,7 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
return NULL; return NULL;
} }
page = alloc_pages(mapping->gfp_mask & ~__GFP_FS, 0); page = alloc_pages(mapping->gfp_mask & ~__GFP_FS, 0);
if (page && add_to_page_cache(page, mapping, index)) { if (page && add_to_page_cache_lru(page, mapping, index)) {
page_cache_release(page); page_cache_release(page);
page = NULL; page = NULL;
} }
...@@ -994,7 +1005,7 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * ...@@ -994,7 +1005,7 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t *
break; break;
} }
} }
error = add_to_page_cache(cached_page, mapping, index); error = add_to_page_cache_lru(cached_page, mapping, index);
if (error) { if (error) {
if (error == -EEXIST) if (error == -EEXIST)
goto find_page; goto find_page;
...@@ -1704,7 +1715,7 @@ struct page *__read_cache_page(struct address_space *mapping, ...@@ -1704,7 +1715,7 @@ struct page *__read_cache_page(struct address_space *mapping,
if (!cached_page) if (!cached_page)
return ERR_PTR(-ENOMEM); return ERR_PTR(-ENOMEM);
} }
err = add_to_page_cache(cached_page, mapping, index); err = add_to_page_cache_lru(cached_page, mapping, index);
if (err == -EEXIST) if (err == -EEXIST)
goto repeat; goto repeat;
if (err < 0) { if (err < 0) {
...@@ -1764,8 +1775,14 @@ struct page *read_cache_page(struct address_space *mapping, ...@@ -1764,8 +1775,14 @@ struct page *read_cache_page(struct address_space *mapping,
return page; return page;
} }
static inline struct page * __grab_cache_page(struct address_space *mapping, /*
unsigned long index, struct page **cached_page) * If the page was newly created, increment its refcount and add it to the
* caller's lru-buffering pagevec. This function is specifically for
* generic_file_write().
*/
static inline struct page *
__grab_cache_page(struct address_space *mapping, unsigned long index,
struct page **cached_page, struct pagevec *lru_pvec)
{ {
int err; int err;
struct page *page; struct page *page;
...@@ -1782,6 +1799,9 @@ static inline struct page * __grab_cache_page(struct address_space *mapping, ...@@ -1782,6 +1799,9 @@ static inline struct page * __grab_cache_page(struct address_space *mapping,
goto repeat; goto repeat;
if (err == 0) { if (err == 0) {
page = *cached_page; page = *cached_page;
page_cache_get(page);
if (!pagevec_add(lru_pvec, page))
__pagevec_lru_add(lru_pvec);
*cached_page = NULL; *cached_page = NULL;
} }
} }
...@@ -1828,6 +1848,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, ...@@ -1828,6 +1848,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf,
int err; int err;
unsigned bytes; unsigned bytes;
time_t time_now; time_t time_now;
struct pagevec lru_pvec;
if (unlikely((ssize_t)count < 0)) if (unlikely((ssize_t)count < 0))
return -EINVAL; return -EINVAL;
...@@ -1949,6 +1970,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, ...@@ -1949,6 +1970,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf,
goto out_status; goto out_status;
} }
pagevec_init(&lru_pvec);
do { do {
unsigned long index; unsigned long index;
unsigned long offset; unsigned long offset;
...@@ -1972,7 +1994,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, ...@@ -1972,7 +1994,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf,
__get_user(dummy, buf+bytes-1); __get_user(dummy, buf+bytes-1);
} }
page = __grab_cache_page(mapping, index, &cached_page); page = __grab_cache_page(mapping, index, &cached_page, &lru_pvec);
if (!page) { if (!page) {
status = -ENOMEM; status = -ENOMEM;
break; break;
...@@ -2034,6 +2056,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf, ...@@ -2034,6 +2056,7 @@ ssize_t generic_file_write_nolock(struct file *file, const char *buf,
out_status: out_status:
err = written ? written : status; err = written ? written : status;
out: out:
pagevec_lru_add(&lru_pvec);
return err; return err;
} }
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/blkdev.h> #include <linux/blkdev.h>
#include <linux/backing-dev.h> #include <linux/backing-dev.h>
#include <linux/pagevec.h>
struct backing_dev_info default_backing_dev_info = { struct backing_dev_info default_backing_dev_info = {
.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE, .ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE,
...@@ -36,6 +37,9 @@ read_pages(struct file *file, struct address_space *mapping, ...@@ -36,6 +37,9 @@ read_pages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages) struct list_head *pages, unsigned nr_pages)
{ {
unsigned page_idx; unsigned page_idx;
struct pagevec lru_pvec;
pagevec_init(&lru_pvec);
if (mapping->a_ops->readpages) if (mapping->a_ops->readpages)
return mapping->a_ops->readpages(mapping, pages, nr_pages); return mapping->a_ops->readpages(mapping, pages, nr_pages);
...@@ -43,10 +47,15 @@ read_pages(struct file *file, struct address_space *mapping, ...@@ -43,10 +47,15 @@ read_pages(struct file *file, struct address_space *mapping,
for (page_idx = 0; page_idx < nr_pages; page_idx++) { for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, list); struct page *page = list_entry(pages->prev, struct page, list);
list_del(&page->list); list_del(&page->list);
if (!add_to_page_cache(page, mapping, page->index)) if (!add_to_page_cache(page, mapping, page->index)) {
if (!pagevec_add(&lru_pvec, page))
__pagevec_lru_add(&lru_pvec);
mapping->a_ops->readpage(file, page); mapping->a_ops->readpage(file, page);
} else {
page_cache_release(page); page_cache_release(page);
} }
}
pagevec_lru_add(&lru_pvec);
return 0; return 0;
} }
......
...@@ -668,7 +668,7 @@ static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct ...@@ -668,7 +668,7 @@ static struct page * shmem_getpage_locked(struct shmem_inode_info *info, struct
page = page_cache_alloc(mapping); page = page_cache_alloc(mapping);
if (!page) if (!page)
goto no_mem; goto no_mem;
error = add_to_page_cache(page, mapping, idx); error = add_to_page_cache_lru(page, mapping, idx);
if (error < 0) { if (error < 0) {
page_cache_release(page); page_cache_release(page);
goto no_mem; goto no_mem;
......
...@@ -71,6 +71,9 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry) ...@@ -71,6 +71,9 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry)
return -ENOENT; return -ENOENT;
} }
error = add_to_page_cache(page, &swapper_space, entry.val); error = add_to_page_cache(page, &swapper_space, entry.val);
/*
* Anon pages are already on the LRU, we don't run lru_cache_add here.
*/
if (error != 0) { if (error != 0) {
swap_free(entry); swap_free(entry);
if (error == -EEXIST) if (error == -EEXIST)
...@@ -275,8 +278,7 @@ int move_from_swap_cache(struct page *page, unsigned long index, ...@@ -275,8 +278,7 @@ int move_from_swap_cache(struct page *page, unsigned long index,
SetPageDirty(page); SetPageDirty(page);
___add_to_page_cache(page, mapping, index); ___add_to_page_cache(page, mapping, index);
/* fix that up */ /* fix that up */
list_del(&page->list); list_move(&page->list, &mapping->dirty_pages);
list_add(&page->list, &mapping->dirty_pages);
write_unlock(&mapping->page_lock); write_unlock(&mapping->page_lock);
write_unlock(&swapper_space.page_lock); write_unlock(&swapper_space.page_lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment