Commit caf49191 authored by Linus Torvalds's avatar Linus Torvalds

Revert "revert "Revert "mm: remove __GFP_NO_KSWAPD""" and associated damage

This reverts commits a5091539 and
d7c3b937.

This is a revert of a revert of a revert.  In addition, it reverts the
even older i915 change to stop using the __GFP_NO_KSWAPD flag due to the
original commits in linux-next.

It turns out that the original patch really was bogus, and that the
original revert was the correct thing to do after all.  We thought we
had fixed the problem, and then reverted the revert, but the problem
really is fundamental: waking up kswapd simply isn't the right thing to
do, and direct reclaim sometimes simply _is_ the right thing to do.

When certain allocations fail, we simply should try some direct reclaim,
and if that fails, fail the allocation.  That's the right thing to do
for THP allocations, which can easily fail, and the GPU allocations want
to do that too.

So starting kswapd is sometimes simply wrong, and removing the flag that
said "don't start kswapd" was a mistake.  Let's hope we never revisit
this mistake again - and certainly not this many times ;)
Acked-by: default avatarMel Gorman <mgorman@suse.de>
Acked-by: default avatarJohannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 31f8d42d
...@@ -1796,7 +1796,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) ...@@ -1796,7 +1796,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
*/ */
mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping; mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
gfp = mapping_gfp_mask(mapping); gfp = mapping_gfp_mask(mapping);
gfp |= __GFP_NORETRY | __GFP_NOWARN; gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
gfp &= ~(__GFP_IO | __GFP_WAIT); gfp &= ~(__GFP_IO | __GFP_WAIT);
for_each_sg(st->sgl, sg, page_count, i) { for_each_sg(st->sgl, sg, page_count, i) {
page = shmem_read_mapping_page_gfp(mapping, i, gfp); page = shmem_read_mapping_page_gfp(mapping, i, gfp);
...@@ -1809,7 +1809,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) ...@@ -1809,7 +1809,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
* our own buffer, now let the real VM do its job and * our own buffer, now let the real VM do its job and
* go down in flames if truly OOM. * go down in flames if truly OOM.
*/ */
gfp &= ~(__GFP_NORETRY | __GFP_NOWARN); gfp &= ~(__GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD);
gfp |= __GFP_IO | __GFP_WAIT; gfp |= __GFP_IO | __GFP_WAIT;
i915_gem_shrink_all(dev_priv); i915_gem_shrink_all(dev_priv);
...@@ -1817,7 +1817,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) ...@@ -1817,7 +1817,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
if (IS_ERR(page)) if (IS_ERR(page))
goto err_pages; goto err_pages;
gfp |= __GFP_NORETRY | __GFP_NOWARN; gfp |= __GFP_NORETRY | __GFP_NOWARN | __GFP_NO_KSWAPD;
gfp &= ~(__GFP_IO | __GFP_WAIT); gfp &= ~(__GFP_IO | __GFP_WAIT);
} }
......
...@@ -1077,7 +1077,8 @@ EXPORT_SYMBOL_GPL(mtd_writev); ...@@ -1077,7 +1077,8 @@ EXPORT_SYMBOL_GPL(mtd_writev);
* until the request succeeds or until the allocation size falls below * until the request succeeds or until the allocation size falls below
* the system page size. This attempts to make sure it does not adversely * the system page size. This attempts to make sure it does not adversely
* impact system performance, so when allocating more than one page, we * impact system performance, so when allocating more than one page, we
* ask the memory allocator to avoid re-trying. * ask the memory allocator to avoid re-trying, swapping, writing back
* or performing I/O.
* *
* Note, this function also makes sure that the allocated buffer is aligned to * Note, this function also makes sure that the allocated buffer is aligned to
* the MTD device's min. I/O unit, i.e. the "mtd->writesize" value. * the MTD device's min. I/O unit, i.e. the "mtd->writesize" value.
...@@ -1091,7 +1092,8 @@ EXPORT_SYMBOL_GPL(mtd_writev); ...@@ -1091,7 +1092,8 @@ EXPORT_SYMBOL_GPL(mtd_writev);
*/ */
void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size) void *mtd_kmalloc_up_to(const struct mtd_info *mtd, size_t *size)
{ {
gfp_t flags = __GFP_NOWARN | __GFP_WAIT | __GFP_NORETRY; gfp_t flags = __GFP_NOWARN | __GFP_WAIT |
__GFP_NORETRY | __GFP_NO_KSWAPD;
size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE); size_t min_alloc = max_t(size_t, mtd->writesize, PAGE_SIZE);
void *kbuf; void *kbuf;
......
...@@ -30,9 +30,10 @@ struct vm_area_struct; ...@@ -30,9 +30,10 @@ struct vm_area_struct;
#define ___GFP_HARDWALL 0x20000u #define ___GFP_HARDWALL 0x20000u
#define ___GFP_THISNODE 0x40000u #define ___GFP_THISNODE 0x40000u
#define ___GFP_RECLAIMABLE 0x80000u #define ___GFP_RECLAIMABLE 0x80000u
#define ___GFP_NOTRACK 0x100000u #define ___GFP_NOTRACK 0x200000u
#define ___GFP_OTHER_NODE 0x200000u #define ___GFP_NO_KSWAPD 0x400000u
#define ___GFP_WRITE 0x400000u #define ___GFP_OTHER_NODE 0x800000u
#define ___GFP_WRITE 0x1000000u
/* /*
* GFP bitmasks.. * GFP bitmasks..
...@@ -85,6 +86,7 @@ struct vm_area_struct; ...@@ -85,6 +86,7 @@ struct vm_area_struct;
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */
#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD)
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */
...@@ -94,7 +96,7 @@ struct vm_area_struct; ...@@ -94,7 +96,7 @@ struct vm_area_struct;
*/ */
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)
#define __GFP_BITS_SHIFT 23 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
/* This equals 0, but use constants in case they ever change */ /* This equals 0, but use constants in case they ever change */
...@@ -114,7 +116,8 @@ struct vm_area_struct; ...@@ -114,7 +116,8 @@ struct vm_area_struct;
__GFP_MOVABLE) __GFP_MOVABLE)
#define GFP_IOFS (__GFP_IO | __GFP_FS) #define GFP_IOFS (__GFP_IO | __GFP_FS)
#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ #define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
__GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
__GFP_NO_KSWAPD)
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
#define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY) #define GFP_THISNODE (__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
......
...@@ -36,6 +36,7 @@ ...@@ -36,6 +36,7 @@
{(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \ {(unsigned long)__GFP_RECLAIMABLE, "GFP_RECLAIMABLE"}, \
{(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \ {(unsigned long)__GFP_MOVABLE, "GFP_MOVABLE"}, \
{(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \ {(unsigned long)__GFP_NOTRACK, "GFP_NOTRACK"}, \
{(unsigned long)__GFP_NO_KSWAPD, "GFP_NO_KSWAPD"}, \
{(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \ {(unsigned long)__GFP_OTHER_NODE, "GFP_OTHER_NODE"} \
) : "GFP_NOWAIT" ) : "GFP_NOWAIT"
...@@ -2416,8 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2416,8 +2416,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
goto nopage; goto nopage;
restart: restart:
wake_all_kswapd(order, zonelist, high_zoneidx, if (!(gfp_mask & __GFP_NO_KSWAPD))
zone_idx(preferred_zone)); wake_all_kswapd(order, zonelist, high_zoneidx,
zone_idx(preferred_zone));
/* /*
* OK, we're below the kswapd watermark and have kicked background * OK, we're below the kswapd watermark and have kicked background
...@@ -2494,7 +2495,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2494,7 +2495,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
* system then fail the allocation instead of entering direct reclaim. * system then fail the allocation instead of entering direct reclaim.
*/ */
if ((deferred_compaction || contended_compaction) && if ((deferred_compaction || contended_compaction) &&
(gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE) (gfp_mask & __GFP_NO_KSWAPD))
goto nopage; goto nopage;
/* Try direct reclaim and then allocating */ /* Try direct reclaim and then allocating */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment