Commit 7fb1d9fc authored by Rohit Seth's avatar Rohit Seth Committed by Linus Torvalds

[PATCH] mm: __alloc_pages cleanup

Clean up of __alloc_pages.

Restoration of previous behaviour, plus further cleanups by introducing an
'alloc_flags', removing the last of should_reclaim_zone.
Signed-off-by: default avatarRohit Seth <rohit.seth@intel.com>
Signed-off-by: default avatarNick Piggin <npiggin@suse.de>
Signed-off-by: default avatarAndrew Morton <akpm@osdl.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@osdl.org>
parent 51c6f666
...@@ -329,7 +329,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive, ...@@ -329,7 +329,7 @@ void get_zone_counts(unsigned long *active, unsigned long *inactive,
void build_all_zonelists(void); void build_all_zonelists(void);
void wakeup_kswapd(struct zone *zone, int order); void wakeup_kswapd(struct zone *zone, int order);
int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int alloc_type, int can_try_harder, gfp_t gfp_high); int classzone_idx, int alloc_flags);
#ifdef CONFIG_HAVE_MEMORY_PRESENT #ifdef CONFIG_HAVE_MEMORY_PRESENT
void memory_present(int nid, unsigned long start, unsigned long end); void memory_present(int nid, unsigned long start, unsigned long end);
......
...@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) ...@@ -732,9 +732,7 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
} }
local_irq_restore(flags); local_irq_restore(flags);
put_cpu(); put_cpu();
} } else {
if (page == NULL) {
spin_lock_irqsave(&zone->lock, flags); spin_lock_irqsave(&zone->lock, flags);
page = __rmqueue(zone, order); page = __rmqueue(zone, order);
spin_unlock_irqrestore(&zone->lock, flags); spin_unlock_irqrestore(&zone->lock, flags);
...@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags) ...@@ -754,20 +752,25 @@ buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
return page; return page;
} }
#define ALLOC_NO_WATERMARKS 0x01 /* don't check watermarks at all */
#define ALLOC_HARDER 0x02 /* try to alloc harder */
#define ALLOC_HIGH 0x04 /* __GFP_HIGH set */
#define ALLOC_CPUSET 0x08 /* check for correct cpuset */
/* /*
* Return 1 if free pages are above 'mark'. This takes into account the order * Return 1 if free pages are above 'mark'. This takes into account the order
* of the allocation. * of the allocation.
*/ */
int zone_watermark_ok(struct zone *z, int order, unsigned long mark, int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int can_try_harder, gfp_t gfp_high) int classzone_idx, int alloc_flags)
{ {
/* free_pages my go negative - that's OK */ /* free_pages my go negative - that's OK */
long min = mark, free_pages = z->free_pages - (1 << order) + 1; long min = mark, free_pages = z->free_pages - (1 << order) + 1;
int o; int o;
if (gfp_high) if (alloc_flags & ALLOC_HIGH)
min -= min / 2; min -= min / 2;
if (can_try_harder) if (alloc_flags & ALLOC_HARDER)
min -= min / 4; min -= min / 4;
if (free_pages <= min + z->lowmem_reserve[classzone_idx]) if (free_pages <= min + z->lowmem_reserve[classzone_idx])
...@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, ...@@ -785,14 +788,40 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark,
return 1; return 1;
} }
static inline int /*
should_reclaim_zone(struct zone *z, gfp_t gfp_mask) * get_page_from_freeliest goes through the zonelist trying to allocate
* a page.
*/
static struct page *
get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, int alloc_flags)
{ {
if (!z->reclaim_pages) struct zone **z = zonelist->zones;
return 0; struct page *page = NULL;
if (gfp_mask & __GFP_NORECLAIM) int classzone_idx = zone_idx(*z);
return 0;
return 1; /*
* Go through the zonelist once, looking for a zone with enough free.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/
do {
if ((alloc_flags & ALLOC_CPUSET) &&
!cpuset_zone_allowed(*z, gfp_mask))
continue;
if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
if (!zone_watermark_ok(*z, order, (*z)->pages_low,
classzone_idx, alloc_flags))
continue;
}
page = buffered_rmqueue(*z, order, gfp_mask);
if (page) {
zone_statistics(zonelist, *z);
break;
}
} while (*(++z) != NULL);
return page;
} }
/* /*
...@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -803,92 +832,60 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist) struct zonelist *zonelist)
{ {
const gfp_t wait = gfp_mask & __GFP_WAIT; const gfp_t wait = gfp_mask & __GFP_WAIT;
struct zone **zones, *z; struct zone **z;
struct page *page; struct page *page;
struct reclaim_state reclaim_state; struct reclaim_state reclaim_state;
struct task_struct *p = current; struct task_struct *p = current;
int i;
int classzone_idx;
int do_retry; int do_retry;
int can_try_harder; int alloc_flags;
int did_some_progress; int did_some_progress;
might_sleep_if(wait); might_sleep_if(wait);
/* z = zonelist->zones; /* the list of zones suitable for gfp_mask */
* The caller may dip into page reserves a bit more if the caller
* cannot run direct reclaim, or is the caller has realtime scheduling
* policy
*/
can_try_harder = (unlikely(rt_task(p)) && !in_interrupt()) || !wait;
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
if (unlikely(zones[0] == NULL)) { if (unlikely(*z == NULL)) {
/* Should this ever happen?? */ /* Should this ever happen?? */
return NULL; return NULL;
} }
classzone_idx = zone_idx(zones[0]);
restart: restart:
/* page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
* Go through the zonelist once, looking for a zone with enough free. zonelist, ALLOC_CPUSET);
* See also cpuset_zone_allowed() comment in kernel/cpuset.c. if (page)
*/ goto got_pg;
for (i = 0; (z = zones[i]) != NULL; i++) {
int do_reclaim = should_reclaim_zone(z, gfp_mask);
if (!cpuset_zone_allowed(z, __GFP_HARDWALL)) do
continue; wakeup_kswapd(*z, order);
while (*(++z));
/* /*
* If the zone is to attempt early page reclaim then this loop * OK, we're below the kswapd watermark and have kicked background
* will try to reclaim pages and check the watermark a second * reclaim. Now things get more complex, so set up alloc_flags according
* time before giving up and falling back to the next zone. * to how we want to proceed.
*
* The caller may dip into page reserves a bit more if the caller
* cannot run direct reclaim, or if the caller has realtime scheduling
* policy.
*/ */
zone_reclaim_retry: alloc_flags = 0;
if (!zone_watermark_ok(z, order, z->pages_low, if ((unlikely(rt_task(p)) && !in_interrupt()) || !wait)
classzone_idx, 0, 0)) { alloc_flags |= ALLOC_HARDER;
if (!do_reclaim) if (gfp_mask & __GFP_HIGH)
continue; alloc_flags |= ALLOC_HIGH;
else { if (wait)
zone_reclaim(z, gfp_mask, order); alloc_flags |= ALLOC_CPUSET;
/* Only try reclaim once */
do_reclaim = 0;
goto zone_reclaim_retry;
}
}
page = buffered_rmqueue(z, order, gfp_mask);
if (page)
goto got_pg;
}
for (i = 0; (z = zones[i]) != NULL; i++)
wakeup_kswapd(z, order);
/* /*
* Go through the zonelist again. Let __GFP_HIGH and allocations * Go through the zonelist again. Let __GFP_HIGH and allocations
* coming from realtime tasks to go deeper into reserves * coming from realtime tasks go deeper into reserves.
* *
* This is the last chance, in general, before the goto nopage. * This is the last chance, in general, before the goto nopage.
* Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc. * Ignore cpuset if GFP_ATOMIC (!wait) rather than fail alloc.
* See also cpuset_zone_allowed() comment in kernel/cpuset.c. * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
*/ */
for (i = 0; (z = zones[i]) != NULL; i++) { page = get_page_from_freelist(gfp_mask, order, zonelist, alloc_flags);
if (!zone_watermark_ok(z, order, z->pages_min,
classzone_idx, can_try_harder,
gfp_mask & __GFP_HIGH))
continue;
if (wait && !cpuset_zone_allowed(z, gfp_mask))
continue;
page = buffered_rmqueue(z, order, gfp_mask);
if (page) if (page)
goto got_pg; goto got_pg;
}
/* This allocation should allow future memory freeing. */ /* This allocation should allow future memory freeing. */
...@@ -897,13 +894,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -897,13 +894,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
if (!(gfp_mask & __GFP_NOMEMALLOC)) { if (!(gfp_mask & __GFP_NOMEMALLOC)) {
nofail_alloc: nofail_alloc:
/* go through the zonelist yet again, ignoring mins */ /* go through the zonelist yet again, ignoring mins */
for (i = 0; (z = zones[i]) != NULL; i++) { page = get_page_from_freelist(gfp_mask, order,
if (!cpuset_zone_allowed(z, gfp_mask)) zonelist, ALLOC_NO_WATERMARKS|ALLOC_CPUSET);
continue;
page = buffered_rmqueue(z, order, gfp_mask);
if (page) if (page)
goto got_pg; goto got_pg;
}
if (gfp_mask & __GFP_NOFAIL) { if (gfp_mask & __GFP_NOFAIL) {
blk_congestion_wait(WRITE, HZ/50); blk_congestion_wait(WRITE, HZ/50);
goto nofail_alloc; goto nofail_alloc;
...@@ -924,7 +918,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -924,7 +918,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
reclaim_state.reclaimed_slab = 0; reclaim_state.reclaimed_slab = 0;
p->reclaim_state = &reclaim_state; p->reclaim_state = &reclaim_state;
did_some_progress = try_to_free_pages(zones, gfp_mask); did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
p->reclaim_state = NULL; p->reclaim_state = NULL;
p->flags &= ~PF_MEMALLOC; p->flags &= ~PF_MEMALLOC;
...@@ -932,19 +926,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -932,19 +926,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
cond_resched(); cond_resched();
if (likely(did_some_progress)) { if (likely(did_some_progress)) {
for (i = 0; (z = zones[i]) != NULL; i++) { page = get_page_from_freelist(gfp_mask, order,
if (!zone_watermark_ok(z, order, z->pages_min, zonelist, alloc_flags);
classzone_idx, can_try_harder,
gfp_mask & __GFP_HIGH))
continue;
if (!cpuset_zone_allowed(z, gfp_mask))
continue;
page = buffered_rmqueue(z, order, gfp_mask);
if (page) if (page)
goto got_pg; goto got_pg;
}
} else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) {
/* /*
* Go through the zonelist yet one more time, keep * Go through the zonelist yet one more time, keep
...@@ -952,18 +937,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -952,18 +937,10 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
* a parallel oom killing, we must fail if we're still * a parallel oom killing, we must fail if we're still
* under heavy pressure. * under heavy pressure.
*/ */
for (i = 0; (z = zones[i]) != NULL; i++) { page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, order,
if (!zone_watermark_ok(z, order, z->pages_high, zonelist, ALLOC_CPUSET);
classzone_idx, 0, 0))
continue;
if (!cpuset_zone_allowed(z, __GFP_HARDWALL))
continue;
page = buffered_rmqueue(z, order, gfp_mask);
if (page) if (page)
goto got_pg; goto got_pg;
}
out_of_memory(gfp_mask, order); out_of_memory(gfp_mask, order);
goto restart; goto restart;
...@@ -996,9 +973,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order, ...@@ -996,9 +973,7 @@ __alloc_pages(gfp_t gfp_mask, unsigned int order,
dump_stack(); dump_stack();
show_mem(); show_mem();
} }
return NULL;
got_pg: got_pg:
zone_statistics(zonelist, z);
return page; return page;
} }
......
...@@ -1074,7 +1074,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order) ...@@ -1074,7 +1074,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)
continue; continue;
if (!zone_watermark_ok(zone, order, if (!zone_watermark_ok(zone, order,
zone->pages_high, 0, 0, 0)) { zone->pages_high, 0, 0)) {
end_zone = i; end_zone = i;
goto scan; goto scan;
} }
...@@ -1111,7 +1111,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order) ...@@ -1111,7 +1111,7 @@ static int balance_pgdat(pg_data_t *pgdat, int nr_pages, int order)
if (nr_pages == 0) { /* Not software suspend */ if (nr_pages == 0) { /* Not software suspend */
if (!zone_watermark_ok(zone, order, if (!zone_watermark_ok(zone, order,
zone->pages_high, end_zone, 0, 0)) zone->pages_high, end_zone, 0))
all_zones_ok = 0; all_zones_ok = 0;
} }
zone->temp_priority = priority; zone->temp_priority = priority;
...@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order) ...@@ -1259,7 +1259,7 @@ void wakeup_kswapd(struct zone *zone, int order)
return; return;
pgdat = zone->zone_pgdat; pgdat = zone->zone_pgdat;
if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0)) if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0))
return; return;
if (pgdat->kswapd_max_order < order) if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order; pgdat->kswapd_max_order = order;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment