Commit f90ac398 authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm: avoid livelock on !__GFP_FS allocations

Colin Cross reported;

  Under the following conditions, __alloc_pages_slowpath can loop forever:
  gfp_mask & __GFP_WAIT is true
  gfp_mask & __GFP_FS is false
  reclaim and compaction make no progress
  order <= PAGE_ALLOC_COSTLY_ORDER

  These conditions happen very often during suspend and resume,
  when pm_restrict_gfp_mask() effectively converts all GFP_KERNEL
  allocations into __GFP_WAIT.

  The oom killer is not run because gfp_mask & __GFP_FS is false,
  but should_alloc_retry will always return true when order is less
  than PAGE_ALLOC_COSTLY_ORDER.

In his fix, he avoided retrying the allocation if reclaim made no progress
and __GFP_FS was not set.  The problem is that this would result in
GFP_NOIO allocations failing that previously succeeded which would be very
unfortunate.

The big difference between GFP_NOIO and suspend converting GFP_KERNEL to
behave like GFP_NOIO is that normally flushers will be cleaning pages and
kswapd reclaims pages allowing GFP_NOIO to succeed after a short delay.
The same does not necessarily apply during suspend as the storage device
may be suspended.

This patch special cases the suspend case to fail the page allocation if
reclaim cannot make progress and adds some documentation on how
gfp_allowed_mask is currently used.  Failing allocations like this may
cause suspend to abort but that is better than a livelock.

[mgorman@suse.de: Rework fix to be suspend specific]
[rientjes@google.com: Move suspended device check to should_alloc_retry]
Reported-by: default avatarColin Cross <ccross@android.com>
Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
Acked-by: default avatarDavid Rientjes <rientjes@google.com>
Cc: Minchan Kim <minchan.kim@gmail.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 938929f1
...@@ -368,9 +368,25 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp); ...@@ -368,9 +368,25 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(void); void drain_all_pages(void);
void drain_local_pages(void *dummy); void drain_local_pages(void *dummy);
/*
* gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
* GFP flags are used before interrupts are enabled. Once interrupts are
* enabled, it is set to __GFP_BITS_MASK while the system is running. During
* hibernation, it is used by PM to avoid I/O during memory allocation while
* devices are suspended.
*/
extern gfp_t gfp_allowed_mask; extern gfp_t gfp_allowed_mask;
extern void pm_restrict_gfp_mask(void); extern void pm_restrict_gfp_mask(void);
extern void pm_restore_gfp_mask(void); extern void pm_restore_gfp_mask(void);
#ifdef CONFIG_PM_SLEEP
extern bool pm_suspended_storage(void);
#else
static inline bool pm_suspended_storage(void)
{
return false;
}
#endif /* CONFIG_PM_SLEEP */
#endif /* __LINUX_GFP_H */ #endif /* __LINUX_GFP_H */
...@@ -127,6 +127,13 @@ void pm_restrict_gfp_mask(void) ...@@ -127,6 +127,13 @@ void pm_restrict_gfp_mask(void)
saved_gfp_mask = gfp_allowed_mask; saved_gfp_mask = gfp_allowed_mask;
gfp_allowed_mask &= ~GFP_IOFS; gfp_allowed_mask &= ~GFP_IOFS;
} }
bool pm_suspended_storage(void)
{
if ((gfp_allowed_mask & GFP_IOFS) == GFP_IOFS)
return false;
return true;
}
#endif /* CONFIG_PM_SLEEP */ #endif /* CONFIG_PM_SLEEP */
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
...@@ -1786,12 +1793,25 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...) ...@@ -1786,12 +1793,25 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
static inline int static inline int
should_alloc_retry(gfp_t gfp_mask, unsigned int order, should_alloc_retry(gfp_t gfp_mask, unsigned int order,
unsigned long did_some_progress,
unsigned long pages_reclaimed) unsigned long pages_reclaimed)
{ {
/* Do not loop if specifically requested */ /* Do not loop if specifically requested */
if (gfp_mask & __GFP_NORETRY) if (gfp_mask & __GFP_NORETRY)
return 0; return 0;
/* Always retry if specifically requested */
if (gfp_mask & __GFP_NOFAIL)
return 1;
/*
* Suspend converts GFP_KERNEL to __GFP_WAIT which can prevent reclaim
* making forward progress without invoking OOM. Suspend also disables
* storage devices so kswapd will not help. Bail if we are suspending.
*/
if (!did_some_progress && pm_suspended_storage())
return 0;
/* /*
* In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER * In this implementation, order <= PAGE_ALLOC_COSTLY_ORDER
* means __GFP_NOFAIL, but that may not be true in other * means __GFP_NOFAIL, but that may not be true in other
...@@ -1810,13 +1830,6 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order, ...@@ -1810,13 +1830,6 @@ should_alloc_retry(gfp_t gfp_mask, unsigned int order,
if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order)) if (gfp_mask & __GFP_REPEAT && pages_reclaimed < (1 << order))
return 1; return 1;
/*
* Don't let big-order allocations loop unless the caller
* explicitly requests that.
*/
if (gfp_mask & __GFP_NOFAIL)
return 1;
return 0; return 0;
} }
...@@ -2209,7 +2222,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, ...@@ -2209,7 +2222,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
/* Check if we should retry the allocation */ /* Check if we should retry the allocation */
pages_reclaimed += did_some_progress; pages_reclaimed += did_some_progress;
if (should_alloc_retry(gfp_mask, order, pages_reclaimed)) { if (should_alloc_retry(gfp_mask, order, did_some_progress,
pages_reclaimed)) {
/* Wait for some write requests to complete then retry */ /* Wait for some write requests to complete then retry */
wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50); wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/50);
goto rebalance; goto rebalance;
......
...@@ -667,10 +667,10 @@ int try_to_free_swap(struct page *page) ...@@ -667,10 +667,10 @@ int try_to_free_swap(struct page *page)
* original page might be freed under memory pressure, then * original page might be freed under memory pressure, then
* later read back in from swap, now with the wrong data. * later read back in from swap, now with the wrong data.
* *
* Hibernation clears bits from gfp_allowed_mask to prevent * Hibration suspends storage while it is writing the image
* memory reclaim from writing to disk, so check that here. * to disk so check that here.
*/ */
if (!(gfp_allowed_mask & __GFP_IO)) if (pm_suspended_storage())
return 0; return 0;
delete_from_swap_cache(page); delete_from_swap_cache(page);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment