Commit 75908778 authored by Andrew Morton's avatar Andrew Morton Committed by Linus Torvalds

[PATCH] implement __GFP_REPEAT, __GFP_NOFAIL, __GFP_NORETRY

This is a cleanup patch.

There are quite a lot of places in the kernel which will infinitely retry a
memory allocation.

Generally, they get it wrong.  Some do yield(), the semantics of which have
changed over time.  Some do schedule(), which can lock up if the caller is
SCHED_FIFO/RR.  Some do schedule_timeout(), etc.

And often it is unnecessary, because the page allocator will do the retry
internally anyway.  But we cannot rely on that - this behaviour may change
(-aa and -rmap kernels do not do this, for instance).

So it is good to formalise and to centralise this operation.  If an
allocation specifies __GFP_REPEAT then the page allocator must infinitely
retry the allocation.

The semantics of __GFP_REPEAT are "try harder".  The allocation _may_ fail
(the 2.4 -aa and -rmap VM's do not retry infinitely by default).

The semantics of __GFP_NOFAIL are "cannot fail".  It is a no-op in this VM,
but needs to be honoured (or fix up the callers) if the VM ischanged to not
retry infinitely by default.

The semantics of __GFP_NOREPEAT are "try once, don't loop".  This isn't used
at present (although perhaps it should be, in swapoff).  It is mainly for
completeness.
parent efbb77b2
...@@ -11,13 +11,26 @@ ...@@ -11,13 +11,26 @@
#define __GFP_DMA 0x01 #define __GFP_DMA 0x01
#define __GFP_HIGHMEM 0x02 #define __GFP_HIGHMEM 0x02
/* Action modifiers - doesn't change the zoning */ /*
* Action modifiers - doesn't change the zoning
*
* __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
* _might_ fail. This depends upon the particular VM implementation.
*
* __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
* cannot handle allocation failures.
*
* __GFP_NORETRY: The VM implementation must not retry indefinitely.
*/
#define __GFP_WAIT 0x10 /* Can wait and reschedule? */ #define __GFP_WAIT 0x10 /* Can wait and reschedule? */
#define __GFP_HIGH 0x20 /* Should access emergency pools? */ #define __GFP_HIGH 0x20 /* Should access emergency pools? */
#define __GFP_IO 0x40 /* Can start physical IO? */ #define __GFP_IO 0x40 /* Can start physical IO? */
#define __GFP_FS 0x80 /* Can call down to low-level FS? */ #define __GFP_FS 0x80 /* Can call down to low-level FS? */
#define __GFP_COLD 0x100 /* Cache-cold page required */ #define __GFP_COLD 0x100 /* Cache-cold page required */
#define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */ #define __GFP_NOWARN 0x200 /* Suppress page allocation failure warning */
#define __GFP_REPEAT 0x400 /* Retry the allocation. Might fail */
#define __GFP_NOFAIL 0x800 /* Retry for ever. Cannot fail */
#define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */
#define GFP_ATOMIC (__GFP_HIGH) #define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT) #define GFP_NOIO (__GFP_WAIT)
......
...@@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t; ...@@ -22,7 +22,7 @@ typedef struct kmem_cache_s kmem_cache_t;
#define SLAB_KERNEL GFP_KERNEL #define SLAB_KERNEL GFP_KERNEL
#define SLAB_DMA GFP_DMA #define SLAB_DMA GFP_DMA
#define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN) #define SLAB_LEVEL_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|__GFP_COLD|__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|__GFP_NORETRY)
#define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */ #define SLAB_NO_GROW 0x00001000UL /* don't grow a cache */
/* flags to pass to kmem_cache_create(). /* flags to pass to kmem_cache_create().
......
...@@ -536,6 +536,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -536,6 +536,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
struct page *page; struct page *page;
int i; int i;
int cold; int cold;
int do_retry;
if (wait) if (wait)
might_sleep(); might_sleep();
...@@ -626,10 +627,21 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, ...@@ -626,10 +627,21 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order,
} }
/* /*
* Don't let big-order allocations loop. Yield for kswapd, try again. * Don't let big-order allocations loop unless the caller explicitly
* requests that. Wait for some write requests to complete then retry.
*
* In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that
* may not be true in other implementations.
*/ */
if (order <= 3) { do_retry = 0;
yield(); if (!(gfp_mask & __GFP_NORETRY)) {
if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
do_retry = 1;
if (gfp_mask & __GFP_NOFAIL)
do_retry = 1;
}
if (do_retry) {
blk_congestion_wait(WRITE, HZ/50);
goto rebalance; goto rebalance;
} }
......
...@@ -805,8 +805,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned, ...@@ -805,8 +805,7 @@ shrink_caches(struct zone *classzone, int priority, int *total_scanned,
* excessive rotation of the inactive list, which is _supposed_ to be an LRU, * excessive rotation of the inactive list, which is _supposed_ to be an LRU,
* yes? * yes?
*/ */
int int try_to_free_pages(struct zone *classzone,
try_to_free_pages(struct zone *classzone,
unsigned int gfp_mask, unsigned int order) unsigned int gfp_mask, unsigned int order)
{ {
int priority; int priority;
...@@ -838,7 +837,7 @@ try_to_free_pages(struct zone *classzone, ...@@ -838,7 +837,7 @@ try_to_free_pages(struct zone *classzone,
blk_congestion_wait(WRITE, HZ/10); blk_congestion_wait(WRITE, HZ/10);
shrink_slab(total_scanned, gfp_mask); shrink_slab(total_scanned, gfp_mask);
} }
if (gfp_mask & __GFP_FS) if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY))
out_of_memory(); out_of_memory();
return 0; return 0;
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment