Commit 8a942fde authored by Mel Gorman's avatar Mel Gorman Committed by Linus Torvalds

mm: meminit: make __early_pfn_to_nid SMP-safe and introduce meminit_pfn_in_nid

__early_pfn_to_nid() use static variables to cache recent lookups as
memblock lookups are very expensive but it assumes that memory
initialisation is single-threaded.  Parallel initialisation of struct
pages will break that assumption so this patch makes __early_pfn_to_nid()
SMP-safe by requiring the caller to cache recent search information.
early_pfn_to_nid() keeps the same interface but is only safe to use early
in boot due to the use of a global static variable.  meminit_pfn_in_nid()
is an SMP-safe version that callers must maintain their own state for.
Signed-off-by: default avatarMel Gorman <mgorman@suse.de>
Tested-by: default avatarNate Zimmer <nzimmer@sgi.com>
Tested-by: default avatarWaiman Long <waiman.long@hp.com>
Tested-by: default avatarDaniel J Blueman <daniel@numascale.com>
Acked-by: default avatarPekka Enberg <penberg@kernel.org>
Cc: Robin Holt <robinmholt@gmail.com>
Cc: Nate Zimmer <nzimmer@sgi.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Waiman Long <waiman.long@hp.com>
Cc: Scott Norton <scott.norton@hp.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent d70ddd7a
...@@ -58,27 +58,22 @@ paddr_to_nid(unsigned long paddr) ...@@ -58,27 +58,22 @@ paddr_to_nid(unsigned long paddr)
* SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where * SPARSEMEM to allocate the SPARSEMEM sectionmap on the NUMA node where
* the section resides. * the section resides.
*/ */
int __meminit __early_pfn_to_nid(unsigned long pfn) int __meminit __early_pfn_to_nid(unsigned long pfn,
struct mminit_pfnnid_cache *state)
{ {
int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec; int i, section = pfn >> PFN_SECTION_SHIFT, ssec, esec;
/*
* NOTE: The following SMP-unsafe globals are only used early in boot
* when the kernel is running single-threaded.
*/
static int __meminitdata last_ssec, last_esec;
static int __meminitdata last_nid;
if (section >= last_ssec && section < last_esec) if (section >= state->last_start && section < state->last_end)
return last_nid; return state->last_nid;
for (i = 0; i < num_node_memblks; i++) { for (i = 0; i < num_node_memblks; i++) {
ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT; ssec = node_memblk[i].start_paddr >> PA_SECTION_SHIFT;
esec = (node_memblk[i].start_paddr + node_memblk[i].size + esec = (node_memblk[i].start_paddr + node_memblk[i].size +
((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT; ((1L << PA_SECTION_SHIFT) - 1)) >> PA_SECTION_SHIFT;
if (section >= ssec && section < esec) { if (section >= ssec && section < esec) {
last_ssec = ssec; state->last_start = ssec;
last_esec = esec; state->last_end = esec;
last_nid = node_memblk[i].nid; state->last_nid = node_memblk[i].nid;
return node_memblk[i].nid; return node_memblk[i].nid;
} }
} }
......
...@@ -1726,7 +1726,8 @@ extern void sparse_memory_present_with_active_regions(int nid); ...@@ -1726,7 +1726,8 @@ extern void sparse_memory_present_with_active_regions(int nid);
#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \ #if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
!defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
static inline int __early_pfn_to_nid(unsigned long pfn) static inline int __early_pfn_to_nid(unsigned long pfn,
struct mminit_pfnnid_cache *state)
{ {
return 0; return 0;
} }
...@@ -1734,7 +1735,8 @@ static inline int __early_pfn_to_nid(unsigned long pfn) ...@@ -1734,7 +1735,8 @@ static inline int __early_pfn_to_nid(unsigned long pfn)
/* please see mm/page_alloc.c */ /* please see mm/page_alloc.c */
extern int __meminit early_pfn_to_nid(unsigned long pfn); extern int __meminit early_pfn_to_nid(unsigned long pfn);
/* there is a per-arch backend function. */ /* there is a per-arch backend function. */
extern int __meminit __early_pfn_to_nid(unsigned long pfn); extern int __meminit __early_pfn_to_nid(unsigned long pfn,
struct mminit_pfnnid_cache *state);
#endif #endif
extern void set_dma_reserve(unsigned long new_dma_reserve); extern void set_dma_reserve(unsigned long new_dma_reserve);
......
...@@ -1216,10 +1216,24 @@ void sparse_init(void); ...@@ -1216,10 +1216,24 @@ void sparse_init(void);
#define sparse_index_init(_sec, _nid) do {} while (0) #define sparse_index_init(_sec, _nid) do {} while (0)
#endif /* CONFIG_SPARSEMEM */ #endif /* CONFIG_SPARSEMEM */
/*
* During memory init memblocks map pfns to nids. The search is expensive and
* this caches recent lookups. The implementation of __early_pfn_to_nid
* may treat start/end as pfns or sections.
*/
struct mminit_pfnnid_cache {
unsigned long last_start;
unsigned long last_end;
int last_nid;
};
#ifdef CONFIG_NODES_SPAN_OTHER_NODES #ifdef CONFIG_NODES_SPAN_OTHER_NODES
bool early_pfn_in_nid(unsigned long pfn, int nid); bool early_pfn_in_nid(unsigned long pfn, int nid);
bool meminit_pfn_in_nid(unsigned long pfn, int node,
struct mminit_pfnnid_cache *state);
#else #else
#define early_pfn_in_nid(pfn, nid) (1) #define early_pfn_in_nid(pfn, nid) (1)
#define meminit_pfn_in_nid(pfn, nid, state) (1)
#endif #endif
#ifndef early_pfn_valid #ifndef early_pfn_valid
......
...@@ -4551,39 +4551,41 @@ int __meminit init_currently_empty_zone(struct zone *zone, ...@@ -4551,39 +4551,41 @@ int __meminit init_currently_empty_zone(struct zone *zone,
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID #ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
/* /*
* Required by SPARSEMEM. Given a PFN, return what node the PFN is on. * Required by SPARSEMEM. Given a PFN, return what node the PFN is on.
*/ */
int __meminit __early_pfn_to_nid(unsigned long pfn) int __meminit __early_pfn_to_nid(unsigned long pfn,
struct mminit_pfnnid_cache *state)
{ {
unsigned long start_pfn, end_pfn; unsigned long start_pfn, end_pfn;
int nid; int nid;
/*
* NOTE: The following SMP-unsafe globals are only used early in boot
* when the kernel is running single-threaded.
*/
static unsigned long __meminitdata last_start_pfn, last_end_pfn;
static int __meminitdata last_nid;
if (last_start_pfn <= pfn && pfn < last_end_pfn) if (state->last_start <= pfn && pfn < state->last_end)
return last_nid; return state->last_nid;
nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn); nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
if (nid != -1) { if (nid != -1) {
last_start_pfn = start_pfn; state->last_start = start_pfn;
last_end_pfn = end_pfn; state->last_end = end_pfn;
last_nid = nid; state->last_nid = nid;
} }
return nid; return nid;
} }
#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */ #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
static struct mminit_pfnnid_cache early_pfnnid_cache __meminitdata;
/* Only safe to use early in boot when initialisation is single-threaded */
int __meminit early_pfn_to_nid(unsigned long pfn) int __meminit early_pfn_to_nid(unsigned long pfn)
{ {
int nid; int nid;
nid = __early_pfn_to_nid(pfn); /* The system will behave unpredictably otherwise */
BUG_ON(system_state != SYSTEM_BOOTING);
nid = __early_pfn_to_nid(pfn, &early_pfnnid_cache);
if (nid >= 0) if (nid >= 0)
return nid; return nid;
/* just returns 0 */ /* just returns 0 */
...@@ -4591,15 +4593,23 @@ int __meminit early_pfn_to_nid(unsigned long pfn) ...@@ -4591,15 +4593,23 @@ int __meminit early_pfn_to_nid(unsigned long pfn)
} }
#ifdef CONFIG_NODES_SPAN_OTHER_NODES #ifdef CONFIG_NODES_SPAN_OTHER_NODES
bool __meminit early_pfn_in_nid(unsigned long pfn, int node) bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
struct mminit_pfnnid_cache *state)
{ {
int nid; int nid;
nid = __early_pfn_to_nid(pfn); nid = __early_pfn_to_nid(pfn, state);
if (nid >= 0 && nid != node) if (nid >= 0 && nid != node)
return false; return false;
return true; return true;
} }
/* Only safe to use early in boot when initialisation is single-threaded */
bool __meminit early_pfn_in_nid(unsigned long pfn, int node)
{
return meminit_pfn_in_nid(pfn, node, &early_pfnnid_cache);
}
#endif #endif
/** /**
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment