Commit d135e575 authored by Pavel Tatashin's avatar Pavel Tatashin Committed by Linus Torvalds

mm/page_alloc.c: broken deferred calculation

In reset_deferred_meminit() we determine number of pages that must not
be deferred.  We initialize pages for at least 2G of memory, but also
pages for reserved memory in this node.

The reserved memory is determined in this function:
memblock_reserved_memory_within(), which operates over physical
addresses, and returns size in bytes.  However, reset_deferred_meminit()
assumes that that this function operates with pfns, and returns page
count.

The result is that in the best case machine boots slower than expected
due to initializing more pages than needed in single thread, and in the
worst case panics because fewer than needed pages are initialized early.

Link: http://lkml.kernel.org/r/20171021011707.15191-1-pasha.tatashin@oracle.com
Fixes: 864b9a39 ("mm: consider memblock reservations for deferred memory initialization sizing")
Signed-off-by: default avatarPavel Tatashin <pasha.tatashin@oracle.com>
Acked-by: default avatarMichal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: <stable@vger.kernel.org>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 400e2249
...@@ -700,7 +700,8 @@ typedef struct pglist_data { ...@@ -700,7 +700,8 @@ typedef struct pglist_data {
* is the first PFN that needs to be initialised. * is the first PFN that needs to be initialised.
*/ */
unsigned long first_deferred_pfn; unsigned long first_deferred_pfn;
unsigned long static_init_size; /* Number of non-deferred pages */
unsigned long static_init_pgcnt;
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
#ifdef CONFIG_TRANSPARENT_HUGEPAGE #ifdef CONFIG_TRANSPARENT_HUGEPAGE
......
...@@ -291,28 +291,37 @@ EXPORT_SYMBOL(nr_online_nodes); ...@@ -291,28 +291,37 @@ EXPORT_SYMBOL(nr_online_nodes);
int page_group_by_mobility_disabled __read_mostly; int page_group_by_mobility_disabled __read_mostly;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
* Determine how many pages need to be initialized durig early boot
* (non-deferred initialization).
* The value of first_deferred_pfn will be set later, once non-deferred pages
* are initialized, but for now set it ULONG_MAX.
*/
static inline void reset_deferred_meminit(pg_data_t *pgdat) static inline void reset_deferred_meminit(pg_data_t *pgdat)
{ {
unsigned long max_initialise; phys_addr_t start_addr, end_addr;
unsigned long reserved_lowmem; unsigned long max_pgcnt;
unsigned long reserved;
/* /*
* Initialise at least 2G of a node but also take into account that * Initialise at least 2G of a node but also take into account that
* two large system hashes that can take up 1GB for 0.25TB/node. * two large system hashes that can take up 1GB for 0.25TB/node.
*/ */
max_initialise = max(2UL << (30 - PAGE_SHIFT), max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
(pgdat->node_spanned_pages >> 8)); (pgdat->node_spanned_pages >> 8));
/* /*
* Compensate the all the memblock reservations (e.g. crash kernel) * Compensate the all the memblock reservations (e.g. crash kernel)
* from the initial estimation to make sure we will initialize enough * from the initial estimation to make sure we will initialize enough
* memory to boot. * memory to boot.
*/ */
reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn, start_addr = PFN_PHYS(pgdat->node_start_pfn);
pgdat->node_start_pfn + max_initialise); end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
max_initialise += reserved_lowmem; reserved = memblock_reserved_memory_within(start_addr, end_addr);
max_pgcnt += PHYS_PFN(reserved);
pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages); pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
pgdat->first_deferred_pfn = ULONG_MAX; pgdat->first_deferred_pfn = ULONG_MAX;
} }
...@@ -339,7 +348,7 @@ static inline bool update_defer_init(pg_data_t *pgdat, ...@@ -339,7 +348,7 @@ static inline bool update_defer_init(pg_data_t *pgdat,
if (zone_end < pgdat_end_pfn(pgdat)) if (zone_end < pgdat_end_pfn(pgdat))
return true; return true;
(*nr_initialised)++; (*nr_initialised)++;
if ((*nr_initialised > pgdat->static_init_size) && if ((*nr_initialised > pgdat->static_init_pgcnt) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) { (pfn & (PAGES_PER_SECTION - 1)) == 0) {
pgdat->first_deferred_pfn = pfn; pgdat->first_deferred_pfn = pfn;
return false; return false;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment