Commit 5954a8b0 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] vmscan: drive everything via nr_to_scan

Page reclaim is currently a bit schitzo: sometimes we say "go and scan this
many pages and tell me how many pages were freed" and at other times we say
"go and scan this many pages, but stop if you freed this many".

It makes the logic harder to control and to understand.  This patch coverts
everything into the "go and scan this many pages and tell me how many pages
were freed" model.

It doesn't seem to affect performance much either way.
parent b532f4af
...@@ -468,39 +468,31 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask, int *nr_scanned) ...@@ -468,39 +468,31 @@ shrink_list(struct list_head *page_list, unsigned int gfp_mask, int *nr_scanned)
* a batch of pages and working on them outside the lock. Any pages which were * a batch of pages and working on them outside the lock. Any pages which were
* not freed will be added back to the LRU. * not freed will be added back to the LRU.
* *
* shrink_cache() is passed the number of pages to try to free, and returns * shrink_cache() is passed the number of pages to scan and returns the number
* the number of pages which were reclaimed. * of pages which were reclaimed.
* *
* For pagecache intensive workloads, the first loop here is the hottest spot * For pagecache intensive workloads, the first loop here is the hottest spot
* in the kernel (apart from the copy_*_user functions). * in the kernel (apart from the copy_*_user functions).
*/ */
static int static int
shrink_cache(const int nr_pages, struct zone *zone, shrink_cache(struct zone *zone, unsigned int gfp_mask,
unsigned int gfp_mask, int max_scan, int *total_scanned) int max_scan, int *total_scanned)
{ {
LIST_HEAD(page_list); LIST_HEAD(page_list);
struct pagevec pvec; struct pagevec pvec;
int nr_to_process;
int ret = 0; int ret = 0;
/*
* Try to ensure that we free `nr_pages' pages in one pass of the loop.
*/
nr_to_process = nr_pages;
if (nr_to_process < SWAP_CLUSTER_MAX)
nr_to_process = SWAP_CLUSTER_MAX;
pagevec_init(&pvec, 1); pagevec_init(&pvec, 1);
lru_add_drain(); lru_add_drain();
spin_lock_irq(&zone->lru_lock); spin_lock_irq(&zone->lru_lock);
while (max_scan > 0 && ret < nr_pages) { while (max_scan > 0) {
struct page *page; struct page *page;
int nr_taken = 0; int nr_taken = 0;
int nr_scan = 0; int nr_scan = 0;
int nr_freed; int nr_freed;
while (nr_scan++ < nr_to_process && while (nr_scan++ < SWAP_CLUSTER_MAX &&
!list_empty(&zone->inactive_list)) { !list_empty(&zone->inactive_list)) {
page = list_entry(zone->inactive_list.prev, page = list_entry(zone->inactive_list.prev,
struct page, lru); struct page, lru);
...@@ -742,13 +734,12 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in, ...@@ -742,13 +734,12 @@ refill_inactive_zone(struct zone *zone, const int nr_pages_in,
} }
/* /*
* Try to reclaim `nr_pages' from this zone. Returns the number of reclaimed * Scan `nr_pages' from this zone. Returns the number of reclaimed pages.
* pages. This is a basic per-zone page freer. Used by both kswapd and * This is a basic per-zone page freer. Used by both kswapd and direct reclaim.
* direct reclaim.
*/ */
static int static int
shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask, shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
const int nr_pages, int *total_scanned, struct page_state *ps) int *total_scanned, struct page_state *ps)
{ {
unsigned long ratio; unsigned long ratio;
...@@ -762,7 +753,7 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask, ...@@ -762,7 +753,7 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
* just to make sure that the kernel will slowly sift through the * just to make sure that the kernel will slowly sift through the
* active list. * active list.
*/ */
ratio = (unsigned long)nr_pages * zone->nr_active / ratio = (unsigned long)SWAP_CLUSTER_MAX * zone->nr_active /
((zone->nr_inactive | 1) * 2); ((zone->nr_inactive | 1) * 2);
atomic_add(ratio+1, &zone->refill_counter); atomic_add(ratio+1, &zone->refill_counter);
if (atomic_read(&zone->refill_counter) > SWAP_CLUSTER_MAX) { if (atomic_read(&zone->refill_counter) > SWAP_CLUSTER_MAX) {
...@@ -780,8 +771,7 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask, ...@@ -780,8 +771,7 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
atomic_set(&zone->refill_counter, 0); atomic_set(&zone->refill_counter, 0);
refill_inactive_zone(zone, count, ps); refill_inactive_zone(zone, count, ps);
} }
return shrink_cache(nr_pages, zone, gfp_mask, return shrink_cache(zone, gfp_mask, max_scan, total_scanned);
max_scan, total_scanned);
} }
/* /*
...@@ -802,13 +792,12 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask, ...@@ -802,13 +792,12 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
*/ */
static int static int
shrink_caches(struct zone **zones, int priority, int *total_scanned, shrink_caches(struct zone **zones, int priority, int *total_scanned,
int gfp_mask, int nr_pages, struct page_state *ps) int gfp_mask, struct page_state *ps)
{ {
int ret = 0; int ret = 0;
int i; int i;
for (i = 0; zones[i] != NULL; i++) { for (i = 0; zones[i] != NULL; i++) {
int to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX);
struct zone *zone = zones[i]; struct zone *zone = zones[i];
int max_scan; int max_scan;
...@@ -818,15 +807,9 @@ shrink_caches(struct zone **zones, int priority, int *total_scanned, ...@@ -818,15 +807,9 @@ shrink_caches(struct zone **zones, int priority, int *total_scanned,
if (zone->all_unreclaimable && priority != DEF_PRIORITY) if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue; /* Let kswapd poll it */ continue; /* Let kswapd poll it */
/* max_scan = max(zone->nr_inactive >> priority,
* If we cannot reclaim `nr_pages' pages by scanning twice SWAP_CLUSTER_MAX * 2UL);
* that many pages then fall back to the next zone. ret += shrink_zone(zone, max_scan, gfp_mask, total_scanned, ps);
*/
max_scan = zone->nr_inactive >> priority;
if (max_scan < to_reclaim * 2)
max_scan = to_reclaim * 2;
ret += shrink_zone(zone, max_scan, gfp_mask,
to_reclaim, total_scanned, ps);
} }
return ret; return ret;
} }
...@@ -853,7 +836,6 @@ int try_to_free_pages(struct zone **zones, ...@@ -853,7 +836,6 @@ int try_to_free_pages(struct zone **zones,
{ {
int priority; int priority;
int ret = 0; int ret = 0;
const int nr_pages = SWAP_CLUSTER_MAX;
int nr_reclaimed = 0; int nr_reclaimed = 0;
struct reclaim_state *reclaim_state = current->reclaim_state; struct reclaim_state *reclaim_state = current->reclaim_state;
int i; int i;
...@@ -869,13 +851,13 @@ int try_to_free_pages(struct zone **zones, ...@@ -869,13 +851,13 @@ int try_to_free_pages(struct zone **zones,
get_page_state(&ps); get_page_state(&ps);
nr_reclaimed += shrink_caches(zones, priority, &total_scanned, nr_reclaimed += shrink_caches(zones, priority, &total_scanned,
gfp_mask, nr_pages, &ps); gfp_mask, &ps);
shrink_slab(total_scanned, gfp_mask); shrink_slab(total_scanned, gfp_mask);
if (reclaim_state) { if (reclaim_state) {
nr_reclaimed += reclaim_state->reclaimed_slab; nr_reclaimed += reclaim_state->reclaimed_slab;
reclaim_state->reclaimed_slab = 0; reclaim_state->reclaimed_slab = 0;
} }
if (nr_reclaimed >= nr_pages) { if (nr_reclaimed >= SWAP_CLUSTER_MAX) {
ret = 1; ret = 1;
goto out; goto out;
} }
...@@ -939,27 +921,21 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps) ...@@ -939,27 +921,21 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps)
struct zone *zone = pgdat->node_zones + i; struct zone *zone = pgdat->node_zones + i;
int total_scanned = 0; int total_scanned = 0;
int max_scan; int max_scan;
int to_reclaim;
int reclaimed; int reclaimed;
if (zone->all_unreclaimable && priority != DEF_PRIORITY) if (zone->all_unreclaimable && priority != DEF_PRIORITY)
continue; continue;
if (nr_pages) { /* Software suspend */ if (nr_pages == 0) { /* Not software suspend */
to_reclaim = min(to_free, SWAP_CLUSTER_MAX*8); if (zone->free_pages <= zone->pages_high)
} else { /* Zone balancing */ all_zones_ok = 0;
to_reclaim = zone->pages_high-zone->free_pages;
if (to_reclaim <= 0)
continue;
} }
zone->temp_priority = priority; zone->temp_priority = priority;
max_scan = zone->nr_inactive >> priority; max_scan = zone->nr_inactive >> priority;
if (max_scan < to_reclaim * 2)
max_scan = to_reclaim * 2;
if (max_scan < SWAP_CLUSTER_MAX) if (max_scan < SWAP_CLUSTER_MAX)
max_scan = SWAP_CLUSTER_MAX; max_scan = SWAP_CLUSTER_MAX;
reclaimed = shrink_zone(zone, max_scan, GFP_KERNEL, reclaimed = shrink_zone(zone, max_scan, GFP_KERNEL,
to_reclaim, &total_scanned, ps); &total_scanned, ps);
reclaim_state->reclaimed_slab = 0; reclaim_state->reclaimed_slab = 0;
shrink_slab(total_scanned, GFP_KERNEL); shrink_slab(total_scanned, GFP_KERNEL);
reclaimed += reclaim_state->reclaimed_slab; reclaimed += reclaim_state->reclaimed_slab;
...@@ -968,16 +944,6 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps) ...@@ -968,16 +944,6 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, struct page_state *ps)
continue; continue;
if (zone->pages_scanned > zone->present_pages * 2) if (zone->pages_scanned > zone->present_pages * 2)
zone->all_unreclaimable = 1; zone->all_unreclaimable = 1;
/*
* If this scan failed to reclaim `to_reclaim' or more
* pages, we're getting into trouble. Need to scan
* some more, and throttle kswapd. Note that this zone
* may now have sufficient free pages due to freeing
* activity by some other process. That's OK - we'll
* pick that info up on the next pass through the loop.
*/
if (reclaimed < to_reclaim)
all_zones_ok = 0;
} }
if (nr_pages && to_free > 0) if (nr_pages && to_free > 0)
continue; /* swsusp: need to do more work */ continue; /* swsusp: need to do more work */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment