Commit 3aa1dc77 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] multithread page reclaim

This patch multithreads the main page reclaim function, shrink_cache().

This function used to run under pagemap_lru_lock.  Instead, we grab
that lock, put 32 pages from the LRU into a private list, drop the
pagemap_lru_lock and then proceed to attempt to free those pages.

Any pages which were succesfully reclaimed are batch-freed.  Pages
which were not reclaimed are re-added to the LRU.

This patch reduces pagemap_lru_lock contention on the 4-way by a factor
of thirty.

The shrink_cache() code has been simplified somewhat.

refill_inactive() was being called too often - often just to process
two or three pages.  Fiddled with that so it processes pages at the
same rate, but works on 32 pages at a time.

Added a couple of mark_page_accessed() calls into mm/memory.c from 2.4.
They seem appropriate.

Change the shrink_caches() logic so that it will still trickle through
the active list (via refill_inactive) even if the inactive list is much
larger than the active list.
parent 6a952840
...@@ -195,6 +195,7 @@ struct page { ...@@ -195,6 +195,7 @@ struct page {
*/ */
#define get_page(p) atomic_inc(&(p)->count) #define get_page(p) atomic_inc(&(p)->count)
#define put_page(p) __free_page(p) #define put_page(p) __free_page(p)
#define __put_page(p) atomic_dec(&(p)->count)
#define put_page_testzero(p) atomic_dec_and_test(&(p)->count) #define put_page_testzero(p) atomic_dec_and_test(&(p)->count)
#define page_count(p) atomic_read(&(p)->count) #define page_count(p) atomic_read(&(p)->count)
#define set_page_count(p,v) atomic_set(&(p)->count, v) #define set_page_count(p,v) atomic_set(&(p)->count, v)
......
...@@ -154,6 +154,7 @@ extern void get_page_state(struct page_state *ret); ...@@ -154,6 +154,7 @@ extern void get_page_state(struct page_state *ret);
ret; \ ret; \
}) })
#define SetPageLRU(page) set_bit(PG_lru, &(page)->flags)
#define PageLRU(page) test_bit(PG_lru, &(page)->flags) #define PageLRU(page) test_bit(PG_lru, &(page)->flags)
#define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags) #define TestSetPageLRU(page) test_and_set_bit(PG_lru, &(page)->flags)
#define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags) #define TestClearPageLRU(page) test_and_clear_bit(PG_lru, &(page)->flags)
...@@ -161,6 +162,7 @@ extern void get_page_state(struct page_state *ret); ...@@ -161,6 +162,7 @@ extern void get_page_state(struct page_state *ret);
#define PageActive(page) test_bit(PG_active, &(page)->flags) #define PageActive(page) test_bit(PG_active, &(page)->flags)
#define SetPageActive(page) set_bit(PG_active, &(page)->flags) #define SetPageActive(page) set_bit(PG_active, &(page)->flags)
#define ClearPageActive(page) clear_bit(PG_active, &(page)->flags) #define ClearPageActive(page) clear_bit(PG_active, &(page)->flags)
#define TestClearPageActive(page) test_and_clear_bit(PG_active, &(page)->flags)
#define PageSlab(page) test_bit(PG_slab, &(page)->flags) #define PageSlab(page) test_bit(PG_slab, &(page)->flags)
#define SetPageSlab(page) set_bit(PG_slab, &(page)->flags) #define SetPageSlab(page) set_bit(PG_slab, &(page)->flags)
......
...@@ -227,12 +227,17 @@ do { \ ...@@ -227,12 +227,17 @@ do { \
BUG(); \ BUG(); \
} while (0) } while (0)
#define __add_page_to_active_list(page) \
do { \
list_add(&(page)->lru, &active_list); \
inc_page_state(nr_active); \
} while (0)
#define add_page_to_active_list(page) \ #define add_page_to_active_list(page) \
do { \ do { \
DEBUG_LRU_PAGE(page); \ DEBUG_LRU_PAGE(page); \
SetPageActive(page); \ SetPageActive(page); \
list_add(&(page)->lru, &active_list); \ __add_page_to_active_list(page); \
inc_page_state(nr_active); \
} while (0) } while (0)
#define add_page_to_inactive_list(page) \ #define add_page_to_inactive_list(page) \
......
...@@ -545,7 +545,8 @@ int add_to_page_cache(struct page *page, ...@@ -545,7 +545,8 @@ int add_to_page_cache(struct page *page,
page_cache_get(page); page_cache_get(page);
} }
write_unlock(&mapping->page_lock); write_unlock(&mapping->page_lock);
if (!error) /* Anon pages are already on the LRU */
if (!error && !PageSwapCache(page))
lru_cache_add(page); lru_cache_add(page);
return error; return error;
} }
......
...@@ -1180,6 +1180,7 @@ static int do_swap_page(struct mm_struct * mm, ...@@ -1180,6 +1180,7 @@ static int do_swap_page(struct mm_struct * mm,
KERNEL_STAT_INC(pgmajfault); KERNEL_STAT_INC(pgmajfault);
} }
mark_page_accessed(page);
lock_page(page); lock_page(page);
/* /*
...@@ -1257,6 +1258,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, ...@@ -1257,6 +1258,7 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
flush_page_to_ram(page); flush_page_to_ram(page);
entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
lru_cache_add(page); lru_cache_add(page);
mark_page_accessed(page);
} }
set_pte(page_table, entry); set_pte(page_table, entry);
......
...@@ -381,6 +381,7 @@ struct page * read_swap_cache_async(swp_entry_t entry) ...@@ -381,6 +381,7 @@ struct page * read_swap_cache_async(swp_entry_t entry)
/* /*
* Initiate read into locked page and return. * Initiate read into locked page and return.
*/ */
lru_cache_add(new_page);
swap_readpage(NULL, new_page); swap_readpage(NULL, new_page);
return new_page; return new_page;
} }
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <linux/writeback.h> #include <linux/writeback.h>
#include <linux/suspend.h> #include <linux/suspend.h>
#include <linux/buffer_head.h> /* for try_to_release_page() */ #include <linux/buffer_head.h> /* for try_to_release_page() */
#include <linux/pagevec.h>
#include <asm/pgalloc.h> #include <asm/pgalloc.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
...@@ -36,10 +37,35 @@ ...@@ -36,10 +37,35 @@
*/ */
#define DEF_PRIORITY (6) #define DEF_PRIORITY (6)
static inline int is_page_cache_freeable(struct page * page) #ifdef ARCH_HAS_PREFETCH
{ #define prefetch_prev_lru_page(_page, _base, _field) \
return page_count(page) - !!PagePrivate(page) == 1; do { \
} if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = list_entry(_page->lru.prev, \
struct page, lru); \
prefetch(&prev->_field); \
} \
} while (0)
#else
#define prefetch_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
#ifdef ARCH_HAS_PREFETCHW
#define prefetchw_prev_lru_page(_page, _base, _field) \
do { \
if ((_page)->lru.prev != _base) { \
struct page *prev; \
\
prev = list_entry(_page->lru.prev, \
struct page, lru); \
prefetchw(&prev->_field); \
} \
} while (0)
#else
#define prefetchw_prev_lru_page(_page, _base, _field) do { } while (0)
#endif
/* Must be called with page's pte_chain_lock held. */ /* Must be called with page's pte_chain_lock held. */
static inline int page_mapping_inuse(struct page * page) static inline int page_mapping_inuse(struct page * page)
...@@ -61,89 +87,49 @@ static inline int page_mapping_inuse(struct page * page) ...@@ -61,89 +87,49 @@ static inline int page_mapping_inuse(struct page * page)
return 0; return 0;
} }
static int static inline int is_page_cache_freeable(struct page *page)
shrink_cache(int nr_pages, zone_t *classzone, {
unsigned int gfp_mask, int priority, int max_scan) return page_count(page) - !!PagePrivate(page) == 2;
}
static /* inline */ int
shrink_list(struct list_head *page_list, int nr_pages, zone_t *classzone,
unsigned int gfp_mask, int priority, int *max_scan)
{ {
struct list_head * entry;
struct address_space *mapping; struct address_space *mapping;
LIST_HEAD(ret_pages);
struct pagevec freed_pvec;
const int nr_pages_in = nr_pages;
int pgactivate = 0;
spin_lock(&pagemap_lru_lock); pagevec_init(&freed_pvec);
while (--max_scan >= 0 && while (!list_empty(page_list)) {
(entry = inactive_list.prev) != &inactive_list) {
struct page *page; struct page *page;
int may_enter_fs; int may_enter_fs;
if (need_resched()) { page = list_entry(page_list->prev, struct page, lru);
spin_unlock(&pagemap_lru_lock); list_del(&page->lru);
__set_current_state(TASK_RUNNING);
schedule();
spin_lock(&pagemap_lru_lock);
continue;
}
page = list_entry(entry, struct page, lru);
if (unlikely(!PageLRU(page)))
BUG();
if (unlikely(PageActive(page)))
BUG();
list_del(entry);
list_add(entry, &inactive_list);
KERNEL_STAT_INC(pgscan);
/*
* Zero page counts can happen because we unlink the pages
* _after_ decrementing the usage count..
*/
if (unlikely(!page_count(page)))
continue;
if (!memclass(page_zone(page), classzone)) if (!memclass(page_zone(page), classzone))
continue; goto keep;
/*
* swap activity never enters the filesystem and is safe
* for GFP_NOFS allocations.
*/
may_enter_fs = (gfp_mask & __GFP_FS) ||
(PageSwapCache(page) && (gfp_mask & __GFP_IO));
/*
* IO in progress? Leave it at the back of the list.
*/
if (unlikely(PageWriteback(page))) {
if (may_enter_fs) {
page_cache_get(page);
spin_unlock(&pagemap_lru_lock);
wait_on_page_writeback(page);
page_cache_release(page);
spin_lock(&pagemap_lru_lock);
}
continue;
}
if (TestSetPageLocked(page)) if (TestSetPageLocked(page))
continue; goto keep;
if (PageWriteback(page)) { /* The non-racy check */ BUG_ON(PageActive(page));
unlock_page(page); may_enter_fs = (gfp_mask & __GFP_FS) ||
continue; (PageSwapCache(page) && (gfp_mask & __GFP_IO));
if (PageWriteback(page)) {
if (may_enter_fs)
wait_on_page_writeback(page); /* throttling */
else
goto keep_locked;
} }
/*
* The page is in active use or really unfreeable. Move to
* the active list.
*/
pte_chain_lock(page); pte_chain_lock(page);
if (page_referenced(page) && page_mapping_inuse(page)) { if (page_referenced(page) && page_mapping_inuse(page)) {
del_page_from_inactive_list(page); /* In active use or really unfreeable. Activate it. */
add_page_to_active_list(page);
pte_chain_unlock(page); pte_chain_unlock(page);
unlock_page(page); goto activate_locked;
KERNEL_STAT_INC(pgactivate);
continue;
} }
/* /*
...@@ -153,18 +139,9 @@ shrink_cache(int nr_pages, zone_t *classzone, ...@@ -153,18 +139,9 @@ shrink_cache(int nr_pages, zone_t *classzone,
* XXX: implement swap clustering ? * XXX: implement swap clustering ?
*/ */
if (page->pte.chain && !page->mapping && !PagePrivate(page)) { if (page->pte.chain && !page->mapping && !PagePrivate(page)) {
page_cache_get(page);
pte_chain_unlock(page); pte_chain_unlock(page);
spin_unlock(&pagemap_lru_lock); if (!add_to_swap(page))
if (!add_to_swap(page)) { goto activate_locked;
activate_page(page);
unlock_page(page);
page_cache_release(page);
spin_lock(&pagemap_lru_lock);
continue;
}
page_cache_release(page);
spin_lock(&pagemap_lru_lock);
pte_chain_lock(page); pte_chain_lock(page);
} }
...@@ -174,30 +151,22 @@ shrink_cache(int nr_pages, zone_t *classzone, ...@@ -174,30 +151,22 @@ shrink_cache(int nr_pages, zone_t *classzone,
*/ */
if (page->pte.chain) { if (page->pte.chain) {
switch (try_to_unmap(page)) { switch (try_to_unmap(page)) {
case SWAP_ERROR: case SWAP_ERROR:
case SWAP_FAIL: case SWAP_FAIL:
goto page_active; pte_chain_unlock(page);
case SWAP_AGAIN: goto activate_locked;
pte_chain_unlock(page); case SWAP_AGAIN:
unlock_page(page); pte_chain_unlock(page);
continue; goto keep_locked;
case SWAP_SUCCESS: case SWAP_SUCCESS:
; /* try to free the page below */ ; /* try to free the page below */
} }
} }
pte_chain_unlock(page); pte_chain_unlock(page);
mapping = page->mapping; mapping = page->mapping;
if (PageDirty(page) && is_page_cache_freeable(page) && if (PageDirty(page) && is_page_cache_freeable(page) &&
page->mapping && may_enter_fs) { mapping && may_enter_fs) {
/*
* It is not critical here to write it only if
* the page is unmapped beause any direct writer
* like O_DIRECT would set the page's dirty bitflag
* on the physical page after having successfully
* pinned it and after the I/O to the page is finished,
* so the direct writes to the page cannot get lost.
*/
int (*writeback)(struct page *, int *); int (*writeback)(struct page *, int *);
const int cluster_size = SWAP_CLUSTER_MAX; const int cluster_size = SWAP_CLUSTER_MAX;
int nr_to_write = cluster_size; int nr_to_write = cluster_size;
...@@ -205,13 +174,9 @@ shrink_cache(int nr_pages, zone_t *classzone, ...@@ -205,13 +174,9 @@ shrink_cache(int nr_pages, zone_t *classzone,
writeback = mapping->a_ops->vm_writeback; writeback = mapping->a_ops->vm_writeback;
if (writeback == NULL) if (writeback == NULL)
writeback = generic_vm_writeback; writeback = generic_vm_writeback;
page_cache_get(page);
spin_unlock(&pagemap_lru_lock);
(*writeback)(page, &nr_to_write); (*writeback)(page, &nr_to_write);
max_scan -= (cluster_size - nr_to_write); *max_scan -= (cluster_size - nr_to_write);
page_cache_release(page); goto keep;
spin_lock(&pagemap_lru_lock);
continue;
} }
/* /*
...@@ -227,162 +192,292 @@ shrink_cache(int nr_pages, zone_t *classzone, ...@@ -227,162 +192,292 @@ shrink_cache(int nr_pages, zone_t *classzone,
* will do this, as well as the blockdev mapping. * will do this, as well as the blockdev mapping.
* try_to_release_page() will discover that cleanness and will * try_to_release_page() will discover that cleanness and will
* drop the buffers and mark the page clean - it can be freed. * drop the buffers and mark the page clean - it can be freed.
*
* Rarely, pages can have buffers and no ->mapping. These are
* the pages which were not successfully invalidated in
* truncate_complete_page(). We try to drop those buffers here
* and if that worked, and the page is no longer mapped into
* process address space (page_count == 0) it can be freed.
* Otherwise, leave the page on the LRU so it is swappable.
*/ */
if (PagePrivate(page)) { if (PagePrivate(page)) {
spin_unlock(&pagemap_lru_lock); if (!try_to_release_page(page, 0))
goto keep_locked;
/* avoid to free a locked page */ if (!mapping && page_count(page) == 1)
page_cache_get(page); goto free_it;
}
if (try_to_release_page(page, gfp_mask)) { if (!mapping)
if (!mapping) { goto keep_locked; /* truncate got there first */
/* effectively free the page here */
unlock_page(page);
page_cache_release(page);
spin_lock(&pagemap_lru_lock);
if (--nr_pages)
continue;
break;
} else {
/*
* The page is still in pagecache so undo the stuff
* before the try_to_release_page since we've not
* finished and we can now try the next step.
*/
page_cache_release(page);
spin_lock(&pagemap_lru_lock);
}
} else {
/* failed to drop the buffers so stop here */
unlock_page(page);
page_cache_release(page);
spin_lock(&pagemap_lru_lock); write_lock(&mapping->page_lock);
continue;
}
}
/* /*
* This is the non-racy check for busy page. * The non-racy check for busy page. It is critical to check
* PageDirty _after_ making sure that the page is freeable and
* not in use by anybody. (pagecache + us == 2)
*/ */
if (mapping) { if (page_count(page) != 2 || PageDirty(page)) {
write_lock(&mapping->page_lock);
if (is_page_cache_freeable(page))
goto page_freeable;
write_unlock(&mapping->page_lock); write_unlock(&mapping->page_lock);
} goto keep_locked;
unlock_page(page);
continue;
page_freeable:
/*
* It is critical to check PageDirty _after_ we made sure
* the page is freeable* so not in use by anybody.
*/
if (PageDirty(page)) {
write_unlock(&mapping->page_lock);
unlock_page(page);
continue;
} }
/* point of no return */ if (PageSwapCache(page)) {
if (likely(!PageSwapCache(page))) { swp_entry_t swap = { .val = page->index };
__remove_from_page_cache(page);
write_unlock(&mapping->page_lock);
} else {
swp_entry_t swap;
swap.val = page->index;
__delete_from_swap_cache(page); __delete_from_swap_cache(page);
write_unlock(&mapping->page_lock); write_unlock(&mapping->page_lock);
swap_free(swap); swap_free(swap);
} else {
__remove_from_page_cache(page);
write_unlock(&mapping->page_lock);
} }
__put_page(page); /* The pagecache ref */
__lru_cache_del(page); free_it:
unlock_page(page); unlock_page(page);
nr_pages--;
if (!pagevec_add(&freed_pvec, page))
__pagevec_release_nonlru(&freed_pvec);
continue;
/* effectively free the page here */ activate_locked:
page_cache_release(page); SetPageActive(page);
KERNEL_STAT_INC(pgsteal); pgactivate++;
if (--nr_pages) keep_locked:
continue;
goto out;
page_active:
/*
* OK, we don't know what to do with the page.
* It's no use keeping it here, so we move it to
* the active list.
*/
del_page_from_inactive_list(page);
add_page_to_active_list(page);
pte_chain_unlock(page);
unlock_page(page); unlock_page(page);
KERNEL_STAT_INC(pgactivate); keep:
list_add(&page->lru, &ret_pages);
BUG_ON(PageLRU(page));
} }
out: spin_unlock(&pagemap_lru_lock); list_splice(&ret_pages, page_list);
if (pagevec_count(&freed_pvec))
__pagevec_release_nonlru(&freed_pvec);
KERNEL_STAT_ADD(pgsteal, nr_pages_in - nr_pages);
KERNEL_STAT_ADD(pgactivate, pgactivate);
return nr_pages; return nr_pages;
} }
/* /*
* This moves pages from the active list to * pagemap_lru_lock is heavily contented. We relieve it by quickly privatising
* the inactive list. * a batch of pages and working on them outside the lock. Any pages which were
* not freed will be added back to the LRU.
*
* shrink_cache() is passed the number of pages to try to free, and returns
* the number which are yet-to-free.
* *
* We move them the other way if the page is * For pagecache intensive workloads, the first loop here is the hottest spot
* referenced by one or more processes, from rmap * in the kernel (apart from the copy_*_user functions).
*/ */
static void refill_inactive(int nr_pages) static /* inline */ int
shrink_cache(int nr_pages, zone_t *classzone,
unsigned int gfp_mask, int priority, int max_scan)
{ {
struct list_head * entry; LIST_HEAD(page_list);
struct pagevec pvec;
int nr_to_process;
/*
* Try to ensure that we free `nr_pages' pages in one pass of the loop.
*/
nr_to_process = nr_pages;
if (nr_to_process < SWAP_CLUSTER_MAX)
nr_to_process = SWAP_CLUSTER_MAX;
pagevec_init(&pvec);
spin_lock(&pagemap_lru_lock); spin_lock(&pagemap_lru_lock);
entry = active_list.prev; while (max_scan > 0 && nr_pages > 0) {
while (nr_pages-- && entry != &active_list) { struct page *page;
struct page * page; int n = 0;
page = list_entry(entry, struct page, lru); while (n < nr_to_process && !list_empty(&inactive_list)) {
entry = entry->prev; page = list_entry(inactive_list.prev, struct page, lru);
KERNEL_STAT_INC(pgscan); prefetchw_prev_lru_page(page, &inactive_list, flags);
pte_chain_lock(page); if (!TestClearPageLRU(page))
if (page->pte.chain && page_referenced(page)) { BUG();
list_del(&page->lru);
if (page_count(page) == 0) {
/* It is currently in pagevec_release() */
SetPageLRU(page);
list_add(&page->lru, &inactive_list);
continue;
}
list_add(&page->lru, &page_list);
page_cache_get(page);
n++;
}
spin_unlock(&pagemap_lru_lock);
if (list_empty(&page_list))
goto done;
max_scan -= n;
mod_page_state(nr_inactive, -n);
KERNEL_STAT_ADD(pgscan, n);
nr_pages = shrink_list(&page_list, nr_pages, classzone,
gfp_mask, priority, &max_scan);
if (nr_pages <= 0 && list_empty(&page_list))
goto done;
spin_lock(&pagemap_lru_lock);
/*
* Put back any unfreeable pages.
*/
while (!list_empty(&page_list)) {
page = list_entry(page_list.prev, struct page, lru);
if (TestSetPageLRU(page))
BUG();
list_del(&page->lru); list_del(&page->lru);
list_add(&page->lru, &active_list); if (PageActive(page))
__add_page_to_active_list(page);
else
add_page_to_inactive_list(page);
if (!pagevec_add(&pvec, page)) {
spin_unlock(&pagemap_lru_lock);
__pagevec_release(&pvec);
spin_lock(&pagemap_lru_lock);
}
}
}
spin_unlock(&pagemap_lru_lock);
done:
pagevec_release(&pvec);
return nr_pages;
}
/*
* This moves pages from the active list to the inactive list.
*
* We move them the other way if the page is referenced by one or more
* processes, from rmap.
*
* If the pages are mostly unmapped, the processing is fast and it is
* appropriate to hold pagemap_lru_lock across the whole operation. But if
* the pages are mapped, the processing is slow (page_referenced()) so we
* should drop pagemap_lru_lock around each page. It's impossible to balance
* this, so instead we remove the pages from the LRU while processing them.
* It is safe to rely on PG_active against the non-LRU pages in here because
* nobody will play with that bit on a non-LRU page.
*
* The downside is that we have to touch page->count against each page.
* But we had to alter page->flags anyway.
*/
static /* inline */ void refill_inactive(const int nr_pages_in)
{
int pgdeactivate = 0;
int nr_pages = nr_pages_in;
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_inactive); /* Pages to go onto the inactive_list */
LIST_HEAD(l_active); /* Pages to go onto the active_list */
struct page *page;
struct pagevec pvec;
spin_lock(&pagemap_lru_lock);
while (nr_pages && !list_empty(&active_list)) {
page = list_entry(active_list.prev, struct page, lru);
prefetchw_prev_lru_page(page, &active_list, flags);
if (!TestClearPageLRU(page))
BUG();
page_cache_get(page);
list_move(&page->lru, &l_hold);
nr_pages--;
}
spin_unlock(&pagemap_lru_lock);
while (!list_empty(&l_hold)) {
page = list_entry(l_hold.prev, struct page, lru);
list_del(&page->lru);
if (page->pte.chain) {
if (test_and_set_bit(PG_chainlock, &page->flags)) {
list_add(&page->lru, &l_active);
continue;
}
if (page->pte.chain && page_referenced(page)) {
pte_chain_unlock(page);
list_add(&page->lru, &l_active);
continue;
}
pte_chain_unlock(page); pte_chain_unlock(page);
continue;
} }
del_page_from_active_list(page); list_add(&page->lru, &l_inactive);
add_page_to_inactive_list(page); pgdeactivate++;
pte_chain_unlock(page); }
KERNEL_STAT_INC(pgdeactivate);
pagevec_init(&pvec);
spin_lock(&pagemap_lru_lock);
while (!list_empty(&l_inactive)) {
page = list_entry(l_inactive.prev, struct page, lru);
prefetchw_prev_lru_page(page, &l_inactive, flags);
if (TestSetPageLRU(page))
BUG();
if (!TestClearPageActive(page))
BUG();
list_move(&page->lru, &inactive_list);
if (!pagevec_add(&pvec, page)) {
spin_unlock(&pagemap_lru_lock);
__pagevec_release(&pvec);
spin_lock(&pagemap_lru_lock);
}
}
while (!list_empty(&l_active)) {
page = list_entry(l_active.prev, struct page, lru);
prefetchw_prev_lru_page(page, &l_active, flags);
if (TestSetPageLRU(page))
BUG();
BUG_ON(!PageActive(page));
list_move(&page->lru, &active_list);
if (!pagevec_add(&pvec, page)) {
spin_unlock(&pagemap_lru_lock);
__pagevec_release(&pvec);
spin_lock(&pagemap_lru_lock);
}
} }
spin_unlock(&pagemap_lru_lock); spin_unlock(&pagemap_lru_lock);
pagevec_release(&pvec);
mod_page_state(nr_active, -pgdeactivate);
mod_page_state(nr_inactive, pgdeactivate);
KERNEL_STAT_ADD(pgscan, nr_pages_in - nr_pages);
KERNEL_STAT_ADD(pgdeactivate, pgdeactivate);
} }
static int FASTCALL(shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages)); static /* inline */ int
static int shrink_caches(zone_t * classzone, int priority, unsigned int gfp_mask, int nr_pages) shrink_caches(zone_t *classzone, int priority,
unsigned int gfp_mask, int nr_pages)
{ {
int chunk_size = nr_pages;
unsigned long ratio; unsigned long ratio;
struct page_state ps; struct page_state ps;
int max_scan; int max_scan;
static atomic_t nr_to_refill = ATOMIC_INIT(0);
nr_pages -= kmem_cache_reap(gfp_mask); if (kmem_cache_reap(gfp_mask) >= nr_pages)
if (nr_pages <= 0) return 0;
return 0;
nr_pages = chunk_size;
/* /*
* Try to keep the active list 2/3 of the size of the cache * Try to keep the active list 2/3 of the size of the cache. And
* make sure that refill_inactive is given a decent number of pages.
*
* The "ratio+1" here is important. With pagecache-intensive workloads
* the inactive list is huge, and `ratio' evaluates to zero all the
* time. Which pins the active list memory. So we add one to `ratio'
* just to make sure that the kernel will slowly sift through the
* active list.
*/ */
get_page_state(&ps); get_page_state(&ps);
ratio = (unsigned long)nr_pages * ps.nr_active / ratio = (unsigned long)nr_pages * ps.nr_active /
((ps.nr_inactive | 1) * 2); ((ps.nr_inactive | 1) * 2);
refill_inactive(ratio); atomic_add(ratio+1, &nr_to_refill);
if (atomic_read(&nr_to_refill) > SWAP_CLUSTER_MAX) {
atomic_sub(SWAP_CLUSTER_MAX, &nr_to_refill);
refill_inactive(SWAP_CLUSTER_MAX);
}
max_scan = ps.nr_inactive / priority; max_scan = ps.nr_inactive / priority;
nr_pages = shrink_cache(nr_pages, classzone, nr_pages = shrink_cache(nr_pages, classzone,
gfp_mask, priority, max_scan); gfp_mask, priority, max_scan);
if (nr_pages <= 0) if (nr_pages <= 0)
return 0; return 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment