Commit 35fd1eb1 authored by Pavel Tatashin's avatar Pavel Tatashin Committed by Linus Torvalds

mm/sparse: abstract sparse buffer allocations

Patch series "sparse_init rewrite", v6.

In sparse_init() we allocate two large buffers to temporary hold usemap
and memmap for the whole machine.  However, we can avoid doing that if
we changed sparse_init() to operated on per-node bases instead of doing
it on the whole machine beforehand.

As shown by Baoquan
  http://lkml.kernel.org/r/20180628062857.29658-1-bhe@redhat.com

The buffers are large enough to cause machine stop to boot on small
memory systems.

Another benefit of these changes is that they also obsolete
CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER.

This patch (of 5):

When struct pages are allocated for sparse-vmemmap VA layout, we first try
to allocate one large buffer, and than if that fails allocate struct pages
for each section as we go.

The code that allocates buffer is uses global variables and is spread
across several call sites.

Cleanup the code by introducing three functions to handle the global
buffer:

sparse_buffer_init()	initialize the buffer
sparse_buffer_fini()	free the remaining part of the buffer
sparse_buffer_alloc()	alloc from the buffer, and if buffer is empty
return NULL

Define these functions in sparse.c instead of sparse-vmemmap.c because
later we will use them for non-vmemmap sparse allocations as well.

[akpm@linux-foundation.org: use PTR_ALIGN()]
[akpm@linux-foundation.org: s/BUG_ON/WARN_ON/]
Link: http://lkml.kernel.org/r/20180712203730.8703-2-pasha.tatashin@oracle.comSigned-off-by: default avatarPavel Tatashin <pasha.tatashin@oracle.com>
Tested-by: Michael Ellerman <mpe@ellerman.id.au>	[powerpc]
Reviewed-by: default avatarOscar Salvador <osalvador@suse.de>
Tested-by: default avatarOscar Salvador <osalvador@suse.de>
Cc: Pasha Tatashin <Pavel.Tatashin@microsoft.com>
Cc: Steven Sistare <steven.sistare@oracle.com>
Cc: Daniel Jordan <daniel.m.jordan@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jérôme Glisse <jglisse@redhat.com>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Baoquan He <bhe@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Wei Yang <richard.weiyang@gmail.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Abdul Haleem <abdhalee@linux.vnet.ibm.com>
Signed-off-by: default avatarAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: default avatarLinus Torvalds <torvalds@linux-foundation.org>
parent 330d6e48
...@@ -2671,6 +2671,10 @@ void sparse_mem_maps_populate_node(struct page **map_map, ...@@ -2671,6 +2671,10 @@ void sparse_mem_maps_populate_node(struct page **map_map,
unsigned long map_count, unsigned long map_count,
int nodeid); int nodeid);
unsigned long __init section_map_size(void);
void sparse_buffer_init(unsigned long size, int nid);
void sparse_buffer_fini(void);
void *sparse_buffer_alloc(unsigned long size);
struct page *sparse_mem_map_populate(unsigned long pnum, int nid, struct page *sparse_mem_map_populate(unsigned long pnum, int nid,
struct vmem_altmap *altmap); struct vmem_altmap *altmap);
pgd_t *vmemmap_pgd_populate(unsigned long addr, int node); pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
......
...@@ -46,9 +46,6 @@ static void * __ref __earlyonly_bootmem_alloc(int node, ...@@ -46,9 +46,6 @@ static void * __ref __earlyonly_bootmem_alloc(int node,
BOOTMEM_ALLOC_ACCESSIBLE, node); BOOTMEM_ALLOC_ACCESSIBLE, node);
} }
static void *vmemmap_buf;
static void *vmemmap_buf_end;
void * __meminit vmemmap_alloc_block(unsigned long size, int node) void * __meminit vmemmap_alloc_block(unsigned long size, int node)
{ {
/* If the main allocator is up use that, fallback to bootmem. */ /* If the main allocator is up use that, fallback to bootmem. */
...@@ -76,18 +73,10 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node) ...@@ -76,18 +73,10 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
/* need to make sure size is all the same during early stage */ /* need to make sure size is all the same during early stage */
void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node) void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
{ {
void *ptr; void *ptr = sparse_buffer_alloc(size);
if (!vmemmap_buf)
return vmemmap_alloc_block(size, node);
/* take the from buf */
ptr = (void *)ALIGN((unsigned long)vmemmap_buf, size);
if (ptr + size > vmemmap_buf_end)
return vmemmap_alloc_block(size, node);
vmemmap_buf = ptr + size;
if (!ptr)
ptr = vmemmap_alloc_block(size, node);
return ptr; return ptr;
} }
...@@ -279,19 +268,9 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, ...@@ -279,19 +268,9 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
unsigned long map_count, int nodeid) unsigned long map_count, int nodeid)
{ {
unsigned long pnum; unsigned long pnum;
unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
void *vmemmap_buf_start;
int nr_consumed_maps = 0; int nr_consumed_maps = 0;
size = ALIGN(size, PMD_SIZE); sparse_buffer_init(section_map_size() * map_count, nodeid);
vmemmap_buf_start = __earlyonly_bootmem_alloc(nodeid, size * map_count,
PMD_SIZE, __pa(MAX_DMA_ADDRESS));
if (vmemmap_buf_start) {
vmemmap_buf = vmemmap_buf_start;
vmemmap_buf_end = vmemmap_buf_start + size * map_count;
}
for (pnum = pnum_begin; pnum < pnum_end; pnum++) { for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
if (!present_section_nr(pnum)) if (!present_section_nr(pnum))
continue; continue;
...@@ -303,12 +282,5 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, ...@@ -303,12 +282,5 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
pr_err("%s: sparsemem memory map backing failed some memory will not be available\n", pr_err("%s: sparsemem memory map backing failed some memory will not be available\n",
__func__); __func__);
} }
sparse_buffer_fini();
if (vmemmap_buf_start) {
/* need to free left buf */
memblock_free_early(__pa(vmemmap_buf),
vmemmap_buf_end - vmemmap_buf);
vmemmap_buf = NULL;
vmemmap_buf_end = NULL;
}
} }
...@@ -400,7 +400,14 @@ static void __init sparse_early_usemaps_alloc_node(void *data, ...@@ -400,7 +400,14 @@ static void __init sparse_early_usemaps_alloc_node(void *data,
} }
} }
#ifndef CONFIG_SPARSEMEM_VMEMMAP #ifdef CONFIG_SPARSEMEM_VMEMMAP
unsigned long __init section_map_size(void)
{
return ALIGN(sizeof(struct page) * PAGES_PER_SECTION, PMD_SIZE);
}
#else
struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
struct vmem_altmap *altmap) struct vmem_altmap *altmap)
{ {
...@@ -457,6 +464,42 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, ...@@ -457,6 +464,42 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
} }
#endif /* !CONFIG_SPARSEMEM_VMEMMAP */ #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
static void *sparsemap_buf __meminitdata;
static void *sparsemap_buf_end __meminitdata;
void __init sparse_buffer_init(unsigned long size, int nid)
{
WARN_ON(sparsemap_buf); /* forgot to call sparse_buffer_fini()? */
sparsemap_buf =
memblock_virt_alloc_try_nid_raw(size, PAGE_SIZE,
__pa(MAX_DMA_ADDRESS),
BOOTMEM_ALLOC_ACCESSIBLE, nid);
sparsemap_buf_end = sparsemap_buf + size;
}
void __init sparse_buffer_fini(void)
{
unsigned long size = sparsemap_buf_end - sparsemap_buf;
if (sparsemap_buf && size > 0)
memblock_free_early(__pa(sparsemap_buf), size);
sparsemap_buf = NULL;
}
void * __meminit sparse_buffer_alloc(unsigned long size)
{
void *ptr = NULL;
if (sparsemap_buf) {
ptr = PTR_ALIGN(sparsemap_buf, size);
if (ptr + size > sparsemap_buf_end)
ptr = NULL;
else
sparsemap_buf = ptr + size;
}
return ptr;
}
#ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
static void __init sparse_early_mem_maps_alloc_node(void *data, static void __init sparse_early_mem_maps_alloc_node(void *data,
unsigned long pnum_begin, unsigned long pnum_begin,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment