Commit fee2b68d authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] strengthen the `incremental min' logic in the page

Strengthen the `incremental min' logic in the page allocator.

Currently it is allowing the allocation to succeed if the zone has
free_pages >= pages_high.

This was to avoid a lockup corner case in which all the zones were at
pages_high so reclaim wasn't doing anything, but the incremental min
refused to take pages from those zones anyway.

But we want the incremental min zone protection to work.  So:

- Only allow the allocator to dip below the incremental min if he
  cannot run direct reclaim.

- Change the page reclaim code so that on the direct reclaim path,
  the caller can free pages beyond ->pages_high.  So if the incremental
  min test fails, the caller will go and free some more memory.

  Eventually, the caller will have freed enough memory for the
  incremental min test to pass against one of the zones.
parent 53bf7bef
...@@ -411,12 +411,25 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold) ...@@ -411,12 +411,25 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
} }
/* /*
* This is the 'heart' of the zoned buddy allocator: * This is the 'heart' of the zoned buddy allocator.
*
* Herein lies the mysterious "incremental min". That's the
*
* min += z->pages_low;
*
* thing. The intent here is to provide additional protection to low zones for
* allocation requests which _could_ use higher zones. So a GFP_HIGHMEM
* request is not allowed to dip as deeply into the normal zone as a GFP_KERNEL
* request. This preserves additional space in those lower zones for requests
* which really do need memory from those zones. It means that on a decent
* sized machine, GFP_HIGHMEM and GFP_KERNEL requests basically leave the DMA
* zone untouched.
*/ */
struct page * struct page *
__alloc_pages(unsigned int gfp_mask, unsigned int order, __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct zonelist *zonelist) struct zonelist *zonelist)
{ {
const int wait = gfp_mask & __GFP_WAIT;
unsigned long min; unsigned long min;
struct zone **zones, *classzone; struct zone **zones, *classzone;
struct page *page; struct page *page;
...@@ -424,7 +437,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -424,7 +437,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
int i; int i;
int cold; int cold;
if (gfp_mask & __GFP_WAIT) if (wait)
might_sleep(); might_sleep();
cold = 0; cold = 0;
...@@ -441,9 +454,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -441,9 +454,9 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
for (i = 0; zones[i] != NULL; i++) { for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i]; struct zone *z = zones[i];
/* the incremental min is allegedly to discourage fallback */
min += z->pages_low; min += z->pages_low;
if (z->free_pages > min || z->free_pages >= z->pages_high) { if (z->free_pages > min ||
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold); page = buffered_rmqueue(z, order, cold);
if (page) if (page)
return page; return page;
...@@ -468,7 +481,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -468,7 +481,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (gfp_mask & __GFP_HIGH) if (gfp_mask & __GFP_HIGH)
local_min >>= 2; local_min >>= 2;
min += local_min; min += local_min;
if (z->free_pages > min || z->free_pages >= z->pages_high) { if (z->free_pages > min ||
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold); page = buffered_rmqueue(z, order, cold);
if (page) if (page)
return page; return page;
...@@ -490,7 +504,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -490,7 +504,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
} }
/* Atomic allocations - we can't balance anything */ /* Atomic allocations - we can't balance anything */
if (!(gfp_mask & __GFP_WAIT)) if (!wait)
goto nopage; goto nopage;
inc_page_state(allocstall); inc_page_state(allocstall);
...@@ -505,7 +519,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -505,7 +519,8 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct zone *z = zones[i]; struct zone *z = zones[i];
min += z->pages_min; min += z->pages_min;
if (z->free_pages > min || z->free_pages >= z->pages_high) { if (z->free_pages > min ||
(!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold); page = buffered_rmqueue(z, order, cold);
if (page) if (page)
return page; return page;
......
...@@ -712,28 +712,28 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask, ...@@ -712,28 +712,28 @@ shrink_zone(struct zone *zone, int max_scan, unsigned int gfp_mask,
* This is the direct reclaim path, for page-allocating processes. We only * This is the direct reclaim path, for page-allocating processes. We only
* try to reclaim pages from zones which will satisfy the caller's allocation * try to reclaim pages from zones which will satisfy the caller's allocation
* request. * request.
*
* We reclaim from a zone even if that zone is over pages_high. Because:
* a) The caller may be trying to free *extra* pages to satisfy a higher-order
* allocation or
* b) The zones may be over pages_high but they must go *over* pages_high to
* satisfy the `incremental min' zone defense algorithm.
*
* Returns the number of reclaimed pages.
*/ */
static int static int
shrink_caches(struct zone *classzone, int priority, int *total_scanned, shrink_caches(struct zone *classzone, int priority, int *total_scanned,
int gfp_mask, const int nr_pages, int order, int gfp_mask, const int nr_pages, struct page_state *ps)
struct page_state *ps)
{ {
struct zone *first_classzone; struct zone *first_classzone;
struct zone *zone; struct zone *zone;
int nr_mapped = 0;
int ret = 0; int ret = 0;
first_classzone = classzone->zone_pgdat->node_zones; first_classzone = classzone->zone_pgdat->node_zones;
for (zone = classzone; zone >= first_classzone; zone--) { for (zone = classzone; zone >= first_classzone; zone--) {
int to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX);
int nr_mapped = 0;
int max_scan; int max_scan;
int to_reclaim;
to_reclaim = zone->pages_high - zone->free_pages;
if (order == 0 && to_reclaim < 0)
continue; /* zone has enough memory */
to_reclaim = min(to_reclaim, SWAP_CLUSTER_MAX);
to_reclaim = max(to_reclaim, nr_pages);
/* /*
* If we cannot reclaim `nr_pages' pages by scanning twice * If we cannot reclaim `nr_pages' pages by scanning twice
...@@ -744,8 +744,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned, ...@@ -744,8 +744,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned,
max_scan = to_reclaim * 2; max_scan = to_reclaim * 2;
ret += shrink_zone(zone, max_scan, gfp_mask, ret += shrink_zone(zone, max_scan, gfp_mask,
to_reclaim, &nr_mapped, ps, priority); to_reclaim, &nr_mapped, ps, priority);
*total_scanned += max_scan; *total_scanned += max_scan + nr_mapped;
*total_scanned += nr_mapped;
if (ret >= nr_pages) if (ret >= nr_pages)
break; break;
} }
...@@ -786,11 +785,11 @@ try_to_free_pages(struct zone *classzone, ...@@ -786,11 +785,11 @@ try_to_free_pages(struct zone *classzone,
get_page_state(&ps); get_page_state(&ps);
nr_reclaimed += shrink_caches(classzone, priority, nr_reclaimed += shrink_caches(classzone, priority,
&total_scanned, gfp_mask, &total_scanned, gfp_mask,
nr_pages, order, &ps); nr_pages, &ps);
if (nr_reclaimed >= nr_pages) if (nr_reclaimed >= nr_pages)
return 1; return 1;
if (total_scanned == 0) if (total_scanned == 0)
return 1; /* All zones had enough free memory */ printk("%s: I am buggy\n", __FUNCTION__);
if (!(gfp_mask & __GFP_FS)) if (!(gfp_mask & __GFP_FS))
break; /* Let the caller handle it */ break; /* Let the caller handle it */
/* /*
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment