Commit 8d6282a1 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] hot-n-cold pages: free and allocate hints

Add a `cold' hint to struct pagevec, and teach truncate and page
reclaim to use it.

Empirical testing showed that truncate's pages tend to be hot.  And page
reclaim's are certainly cold.
parent 5019ce29
...@@ -263,7 +263,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages, ...@@ -263,7 +263,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
sector_t last_block_in_bio = 0; sector_t last_block_in_bio = 0;
struct pagevec lru_pvec; struct pagevec lru_pvec;
pagevec_init(&lru_pvec); pagevec_init(&lru_pvec, 0);
for (page_idx = 0; page_idx < nr_pages; page_idx++) { for (page_idx = 0; page_idx < nr_pages; page_idx++) {
struct page *page = list_entry(pages->prev, struct page, list); struct page *page = list_entry(pages->prev, struct page, list);
...@@ -560,7 +560,7 @@ mpage_writepages(struct address_space *mapping, ...@@ -560,7 +560,7 @@ mpage_writepages(struct address_space *mapping,
if (get_block == NULL) if (get_block == NULL)
writepage = mapping->a_ops->writepage; writepage = mapping->a_ops->writepage;
pagevec_init(&pvec); pagevec_init(&pvec, 0);
write_lock(&mapping->page_lock); write_lock(&mapping->page_lock);
list_splice_init(&mapping->dirty_pages, &mapping->io_pages); list_splice_init(&mapping->dirty_pages, &mapping->io_pages);
......
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
#define page_cache_get(page) get_page(page) #define page_cache_get(page) get_page(page)
#define page_cache_release(page) put_page(page) #define page_cache_release(page) put_page(page)
void release_pages(struct page **pages, int nr); void release_pages(struct page **pages, int nr, int cold);
static inline struct page *page_cache_alloc(struct address_space *x) static inline struct page *page_cache_alloc(struct address_space *x)
{ {
......
...@@ -12,6 +12,7 @@ struct address_space; ...@@ -12,6 +12,7 @@ struct address_space;
struct pagevec { struct pagevec {
unsigned nr; unsigned nr;
int cold;
struct page *pages[PAGEVEC_SIZE]; struct page *pages[PAGEVEC_SIZE];
}; };
...@@ -25,7 +26,13 @@ void pagevec_strip(struct pagevec *pvec); ...@@ -25,7 +26,13 @@ void pagevec_strip(struct pagevec *pvec);
unsigned int pagevec_lookup(struct pagevec *pvec, struct address_space *mapping, unsigned int pagevec_lookup(struct pagevec *pvec, struct address_space *mapping,
pgoff_t start, unsigned int nr_pages); pgoff_t start, unsigned int nr_pages);
static inline void pagevec_init(struct pagevec *pvec) static inline void pagevec_init(struct pagevec *pvec, int cold)
{
pvec->nr = 0;
pvec->cold = cold;
}
static inline void pagevec_reinit(struct pagevec *pvec)
{ {
pvec->nr = 0; pvec->nr = 0;
} }
...@@ -49,6 +56,7 @@ static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page) ...@@ -49,6 +56,7 @@ static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page)
return pagevec_space(pvec); return pagevec_space(pvec);
} }
static inline void pagevec_release(struct pagevec *pvec) static inline void pagevec_release(struct pagevec *pvec)
{ {
if (pagevec_count(pvec)) if (pagevec_count(pvec))
......
...@@ -1449,7 +1449,7 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov, ...@@ -1449,7 +1449,7 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov,
/* We can write back this queue in page reclaim */ /* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info; current->backing_dev_info = mapping->backing_dev_info;
pagevec_init(&lru_pvec); pagevec_init(&lru_pvec, 0);
if (unlikely(file->f_error)) { if (unlikely(file->f_error)) {
err = file->f_error; err = file->f_error;
......
...@@ -548,7 +548,7 @@ void __pagevec_free(struct pagevec *pvec) ...@@ -548,7 +548,7 @@ void __pagevec_free(struct pagevec *pvec)
int i = pagevec_count(pvec); int i = pagevec_count(pvec);
while (--i >= 0) while (--i >= 0)
free_hot_page(pvec->pages[i]); free_hot_cold_page(pvec->pages[i], pvec->cold);
} }
void __free_pages(struct page *page, unsigned int order) void __free_pages(struct page *page, unsigned int order)
......
...@@ -49,7 +49,7 @@ read_pages(struct address_space *mapping, struct file *filp, ...@@ -49,7 +49,7 @@ read_pages(struct address_space *mapping, struct file *filp,
unsigned page_idx; unsigned page_idx;
struct pagevec lru_pvec; struct pagevec lru_pvec;
pagevec_init(&lru_pvec); pagevec_init(&lru_pvec, 0);
if (mapping->a_ops->readpages) if (mapping->a_ops->readpages)
return mapping->a_ops->readpages(mapping, pages, nr_pages); return mapping->a_ops->readpages(mapping, pages, nr_pages);
......
...@@ -99,13 +99,13 @@ void __page_cache_release(struct page *page) ...@@ -99,13 +99,13 @@ void __page_cache_release(struct page *page)
* page count inside the lock to see whether shrink_cache grabbed the page * page count inside the lock to see whether shrink_cache grabbed the page
* via the LRU. If it did, give up: shrink_cache will free it. * via the LRU. If it did, give up: shrink_cache will free it.
*/ */
void release_pages(struct page **pages, int nr) void release_pages(struct page **pages, int nr, int cold)
{ {
int i; int i;
struct pagevec pages_to_free; struct pagevec pages_to_free;
struct zone *zone = NULL; struct zone *zone = NULL;
pagevec_init(&pages_to_free); pagevec_init(&pages_to_free, cold);
for (i = 0; i < nr; i++) { for (i = 0; i < nr; i++) {
struct page *page = pages[i]; struct page *page = pages[i];
struct zone *pagezone; struct zone *pagezone;
...@@ -126,7 +126,7 @@ void release_pages(struct page **pages, int nr) ...@@ -126,7 +126,7 @@ void release_pages(struct page **pages, int nr)
if (!pagevec_add(&pages_to_free, page)) { if (!pagevec_add(&pages_to_free, page)) {
spin_unlock_irq(&zone->lru_lock); spin_unlock_irq(&zone->lru_lock);
__pagevec_free(&pages_to_free); __pagevec_free(&pages_to_free);
pagevec_init(&pages_to_free); pagevec_reinit(&pages_to_free);
zone = NULL; /* No lock is held */ zone = NULL; /* No lock is held */
} }
} }
...@@ -139,8 +139,8 @@ void release_pages(struct page **pages, int nr) ...@@ -139,8 +139,8 @@ void release_pages(struct page **pages, int nr)
void __pagevec_release(struct pagevec *pvec) void __pagevec_release(struct pagevec *pvec)
{ {
release_pages(pvec->pages, pagevec_count(pvec)); release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
pagevec_init(pvec); pagevec_reinit(pvec);
} }
/* /*
...@@ -153,7 +153,8 @@ void __pagevec_release_nonlru(struct pagevec *pvec) ...@@ -153,7 +153,8 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
int i; int i;
struct pagevec pages_to_free; struct pagevec pages_to_free;
pagevec_init(&pages_to_free); pagevec_init(&pages_to_free, pvec->cold);
pages_to_free.cold = pvec->cold;
for (i = 0; i < pagevec_count(pvec); i++) { for (i = 0; i < pagevec_count(pvec); i++) {
struct page *page = pvec->pages[i]; struct page *page = pvec->pages[i];
...@@ -162,7 +163,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec) ...@@ -162,7 +163,7 @@ void __pagevec_release_nonlru(struct pagevec *pvec)
pagevec_add(&pages_to_free, page); pagevec_add(&pages_to_free, page);
} }
pagevec_free(&pages_to_free); pagevec_free(&pages_to_free);
pagevec_init(pvec); pagevec_reinit(pvec);
} }
/* /*
......
...@@ -301,7 +301,7 @@ void free_pages_and_swap_cache(struct page **pages, int nr) ...@@ -301,7 +301,7 @@ void free_pages_and_swap_cache(struct page **pages, int nr)
for (i = 0; i < todo; i++) for (i = 0; i < todo; i++)
free_swap_cache(pagep[i]); free_swap_cache(pagep[i]);
release_pages(pagep, todo); release_pages(pagep, todo, 0);
pagep += todo; pagep += todo;
nr -= todo; nr -= todo;
} }
......
...@@ -100,6 +100,10 @@ invalidate_complete_page(struct address_space *mapping, struct page *page) ...@@ -100,6 +100,10 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
* When looking at page->index outside the page lock we need to be careful to * When looking at page->index outside the page lock we need to be careful to
* copy it into a local to avoid races (it could change at any time). * copy it into a local to avoid races (it could change at any time).
* *
* We pass down the cache-hot hint to the page freeing code. Even if the
* mapping is large, it is probably the case that the final pages are the most
* recently touched, and freeing happens in ascending file offset order.
*
* Called under (and serialised by) inode->i_sem. * Called under (and serialised by) inode->i_sem.
*/ */
void truncate_inode_pages(struct address_space *mapping, loff_t lstart) void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
...@@ -110,7 +114,7 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart) ...@@ -110,7 +114,7 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
pgoff_t next; pgoff_t next;
int i; int i;
pagevec_init(&pvec); pagevec_init(&pvec, 0);
next = start; next = start;
while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
for (i = 0; i < pagevec_count(&pvec); i++) { for (i = 0; i < pagevec_count(&pvec); i++) {
...@@ -185,7 +189,7 @@ void invalidate_inode_pages(struct address_space *mapping) ...@@ -185,7 +189,7 @@ void invalidate_inode_pages(struct address_space *mapping)
pgoff_t next = 0; pgoff_t next = 0;
int i; int i;
pagevec_init(&pvec); pagevec_init(&pvec, 0);
while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
for (i = 0; i < pagevec_count(&pvec); i++) { for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i]; struct page *page = pvec.pages[i];
...@@ -226,7 +230,7 @@ void invalidate_inode_pages2(struct address_space *mapping) ...@@ -226,7 +230,7 @@ void invalidate_inode_pages2(struct address_space *mapping)
pgoff_t next = 0; pgoff_t next = 0;
int i; int i;
pagevec_init(&pvec); pagevec_init(&pvec, 0);
while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) { while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
for (i = 0; i < pagevec_count(&pvec); i++) { for (i = 0; i < pagevec_count(&pvec); i++) {
struct page *page = pvec.pages[i]; struct page *page = pvec.pages[i];
......
...@@ -203,7 +203,7 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask, ...@@ -203,7 +203,7 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask,
int pgactivate = 0; int pgactivate = 0;
int ret = 0; int ret = 0;
pagevec_init(&freed_pvec); pagevec_init(&freed_pvec, 1);
while (!list_empty(page_list)) { while (!list_empty(page_list)) {
struct page *page; struct page *page;
int may_enter_fs; int may_enter_fs;
...@@ -433,7 +433,7 @@ shrink_cache(const int nr_pages, struct zone *zone, ...@@ -433,7 +433,7 @@ shrink_cache(const int nr_pages, struct zone *zone,
if (nr_to_process < SWAP_CLUSTER_MAX) if (nr_to_process < SWAP_CLUSTER_MAX)
nr_to_process = SWAP_CLUSTER_MAX; nr_to_process = SWAP_CLUSTER_MAX;
pagevec_init(&pvec); pagevec_init(&pvec, 1);
lru_add_drain(); lru_add_drain();
spin_lock_irq(&zone->lru_lock); spin_lock_irq(&zone->lru_lock);
...@@ -617,7 +617,7 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in, ...@@ -617,7 +617,7 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in,
pgdeactivate++; pgdeactivate++;
} }
pagevec_init(&pvec); pagevec_init(&pvec, 1);
spin_lock_irq(&zone->lru_lock); spin_lock_irq(&zone->lru_lock);
while (!list_empty(&l_inactive)) { while (!list_empty(&l_inactive)) {
page = list_entry(l_inactive.prev, struct page, lru); page = list_entry(l_inactive.prev, struct page, lru);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment