Commit 3c7011b3 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] use compound pages for hugetlb pages only

The compound page logic is a little fragile - it relies on additional
metadata in the pageframes which some other kernel code likes to stomp on
(xfs was doing this).

Also, because we're treating all higher-order pages as compound pages it is
no longer possible to free individual lower-order pages from the middle of
higher-order pages.  At least one ARM driver insists on doing this.

We only really need the compound page logic for higher-order pages which can
be mapped into user pagetables and placed under direct-io.  This covers
hugetlb pages and, conceivably, soundcard DMA buffers which were allcoated
with a higher-order allocation but which weren't marked PageReserved.

The patch arranges for the hugetlb implications to allocate their pages with
compound page metadata, and all other higher-order allocations go back to the
old way.

(Andrea supplied the GFP_LEVEL_MASK fix)
parent 60af4464
...@@ -54,7 +54,8 @@ static struct page *alloc_fresh_huge_page(void) ...@@ -54,7 +54,8 @@ static struct page *alloc_fresh_huge_page(void)
{ {
static int nid = 0; static int nid = 0;
struct page *page; struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes; nid = (nid + 1) % numnodes;
return page; return page;
} }
......
...@@ -58,7 +58,8 @@ static struct page *alloc_fresh_huge_page(void) ...@@ -58,7 +58,8 @@ static struct page *alloc_fresh_huge_page(void)
{ {
static int nid = 0; static int nid = 0;
struct page *page; struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes; nid = (nid + 1) % numnodes;
return page; return page;
} }
......
...@@ -78,7 +78,8 @@ static struct page *alloc_fresh_huge_page(void) ...@@ -78,7 +78,8 @@ static struct page *alloc_fresh_huge_page(void)
static int nid = 0; static int nid = 0;
struct page *page; struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
if (!page) if (!page)
return NULL; return NULL;
......
...@@ -60,7 +60,8 @@ static struct page *alloc_fresh_huge_page(void) ...@@ -60,7 +60,8 @@ static struct page *alloc_fresh_huge_page(void)
{ {
static int nid = 0; static int nid = 0;
struct page *page; struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes; nid = (nid + 1) % numnodes;
return page; return page;
} }
......
...@@ -56,7 +56,8 @@ static struct page *alloc_fresh_huge_page(void) ...@@ -56,7 +56,8 @@ static struct page *alloc_fresh_huge_page(void)
{ {
static int nid = 0; static int nid = 0;
struct page *page; struct page *page;
page = alloc_pages_node(nid, GFP_HIGHUSER, HUGETLB_PAGE_ORDER); page = alloc_pages_node(nid, GFP_HIGHUSER|__GFP_COMP,
HUGETLB_PAGE_ORDER);
nid = (nid + 1) % numnodes; nid = (nid + 1) % numnodes;
return page; return page;
} }
......
...@@ -32,10 +32,16 @@ ...@@ -32,10 +32,16 @@
#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */ #define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */
#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */ #define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */
#define __GFP_NO_GROW 0x2000 /* Slab internal usage */ #define __GFP_NO_GROW 0x2000 /* Slab internal usage */
#define __GFP_COMP 0x4000 /* Add compound page metadata */
#define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */ #define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */
#define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1)
/* if you forget to add the bitmask here kernel will crash, period */
#define GFP_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS| \
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT| \
__GFP_NOFAIL|__GFP_NORETRY|__GFP_NO_GROW|__GFP_COMP)
#define GFP_ATOMIC (__GFP_HIGH) #define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT) #define GFP_NOIO (__GFP_WAIT)
#define GFP_NOFS (__GFP_WAIT | __GFP_IO) #define GFP_NOFS (__GFP_WAIT | __GFP_IO)
......
...@@ -247,14 +247,14 @@ static inline int page_count(struct page *p) ...@@ -247,14 +247,14 @@ static inline int page_count(struct page *p)
static inline void get_page(struct page *page) static inline void get_page(struct page *page)
{ {
if (PageCompound(page)) if (unlikely(PageCompound(page)))
page = (struct page *)page->private; page = (struct page *)page->private;
atomic_inc(&page->count); atomic_inc(&page->count);
} }
static inline void put_page(struct page *page) static inline void put_page(struct page *page)
{ {
if (PageCompound(page)) { if (unlikely(PageCompound(page))) {
page = (struct page *)page->private; page = (struct page *)page->private;
if (put_page_testzero(page)) { if (put_page_testzero(page)) {
if (page[1].mapping) { /* destructor? */ if (page[1].mapping) { /* destructor? */
......
...@@ -25,9 +25,7 @@ typedef struct kmem_cache_s kmem_cache_t; ...@@ -25,9 +25,7 @@ typedef struct kmem_cache_s kmem_cache_t;
#define SLAB_KERNEL GFP_KERNEL #define SLAB_KERNEL GFP_KERNEL
#define SLAB_DMA GFP_DMA #define SLAB_DMA GFP_DMA
#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ #define SLAB_LEVEL_MASK GFP_LEVEL_MASK
__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|\
__GFP_NOFAIL|__GFP_NORETRY)
#define SLAB_NO_GROW __GFP_NO_GROW /* don't grow a cache */ #define SLAB_NO_GROW __GFP_NO_GROW /* don't grow a cache */
......
...@@ -130,6 +130,9 @@ static void destroy_compound_page(struct page *page, unsigned long order) ...@@ -130,6 +130,9 @@ static void destroy_compound_page(struct page *page, unsigned long order)
int i; int i;
int nr_pages = 1 << order; int nr_pages = 1 << order;
if (!PageCompound(page))
return;
if (page[1].index != order) if (page[1].index != order)
bad_page(__FUNCTION__, page); bad_page(__FUNCTION__, page);
...@@ -487,10 +490,12 @@ void fastcall free_cold_page(struct page *page) ...@@ -487,10 +490,12 @@ void fastcall free_cold_page(struct page *page)
* or two. * or two.
*/ */
static struct page *buffered_rmqueue(struct zone *zone, int order, int cold) static struct page *
buffered_rmqueue(struct zone *zone, int order, int gfp_flags)
{ {
unsigned long flags; unsigned long flags;
struct page *page = NULL; struct page *page = NULL;
int cold = !!(gfp_flags & __GFP_COLD);
if (order == 0) { if (order == 0) {
struct per_cpu_pages *pcp; struct per_cpu_pages *pcp;
...@@ -519,7 +524,7 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold) ...@@ -519,7 +524,7 @@ static struct page *buffered_rmqueue(struct zone *zone, int order, int cold)
BUG_ON(bad_range(zone, page)); BUG_ON(bad_range(zone, page));
mod_page_state_zone(zone, pgalloc, 1 << order); mod_page_state_zone(zone, pgalloc, 1 << order);
prep_new_page(page, order); prep_new_page(page, order);
if (order) if (order && (gfp_flags & __GFP_COMP))
prep_compound_page(page, order); prep_compound_page(page, order);
} }
return page; return page;
...@@ -552,16 +557,11 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -552,16 +557,11 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct reclaim_state reclaim_state; struct reclaim_state reclaim_state;
struct task_struct *p = current; struct task_struct *p = current;
int i; int i;
int cold;
int alloc_type; int alloc_type;
int do_retry; int do_retry;
might_sleep_if(wait); might_sleep_if(wait);
cold = 0;
if (gfp_mask & __GFP_COLD)
cold = 1;
zones = zonelist->zones; /* the list of zones suitable for gfp_mask */ zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
if (zones[0] == NULL) /* no zones in the zonelist */ if (zones[0] == NULL) /* no zones in the zonelist */
return NULL; return NULL;
...@@ -583,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -583,7 +583,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (z->free_pages >= min || if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) { (!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold); page = buffered_rmqueue(z, order, gfp_mask);
if (page) if (page)
goto got_pg; goto got_pg;
} }
...@@ -606,7 +606,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -606,7 +606,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (z->free_pages >= min || if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) { (!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold); page = buffered_rmqueue(z, order, gfp_mask);
if (page) if (page)
goto got_pg; goto got_pg;
} }
...@@ -620,7 +620,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -620,7 +620,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
for (i = 0; zones[i] != NULL; i++) { for (i = 0; zones[i] != NULL; i++) {
struct zone *z = zones[i]; struct zone *z = zones[i];
page = buffered_rmqueue(z, order, cold); page = buffered_rmqueue(z, order, gfp_mask);
if (page) if (page)
goto got_pg; goto got_pg;
} }
...@@ -648,7 +648,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -648,7 +648,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
if (z->free_pages >= min || if (z->free_pages >= min ||
(!wait && z->free_pages >= z->pages_high)) { (!wait && z->free_pages >= z->pages_high)) {
page = buffered_rmqueue(z, order, cold); page = buffered_rmqueue(z, order, gfp_mask);
if (page) if (page)
goto got_pg; goto got_pg;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment