Commit b2fc97c1 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock

Pull memblock updates from Mike Rapoport:

 - 'reserve_mem' command line parameter to allow creation of named
   memory reservation at boot time.

   The driving use-case is to improve the ability of pstore to retain
   ramoops data across reboots.

 - cleanups and small improvements in memblock and mm_init

 - new tests cases in memblock test suite

* tag 'memblock-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rppt/memblock:
  memblock tests: fix implicit declaration of function 'numa_valid_node'
  memblock: Move late alloc warning down to phys alloc
  pstore/ramoops: Add ramoops.mem_name= command line option
  mm/memblock: Add "reserve_mem" to reserved named memory at boot up
  mm/mm_init.c: don't initialize page->lru again
  mm/mm_init.c: not always search next deferred_init_pfn from very beginning
  mm/mm_init.c: use deferred_init_mem_pfn_range_in_zone() to decide loop condition
  mm/mm_init.c: get the highest zone directly
  mm/mm_init.c: move nr_initialised reset down a bit
  mm/memblock: fix a typo in description of for_each_mem_region()
  mm/mm_init.c: use memblock_region_memory_base_pfn() to get startpfn
  mm/memblock: use PAGE_ALIGN_DOWN to get pgend in free_memmap
  mm/memblock: return true directly on finding overlap region
  memblock tests: add memblock_overlaps_region_checks
  mm/memblock: fix comment for memblock_isolate_range()
  memblock tests: add memblock_reserve_many_may_conflict_check()
  memblock tests: add memblock_reserve_all_locations_check()
  mm/memblock: remove empty dummy entry
parents 68b59730 9364a7e4
......@@ -5674,6 +5674,28 @@
them. If <base> is less than 0x10000, the region
is assumed to be I/O ports; otherwise it is memory.
reserve_mem= [RAM]
Format: nn[KNG]:<align>:<label>
Reserve physical memory and label it with a name that
other subsystems can use to access it. This is typically
used for systems that do not wipe the RAM, and this command
line will try to reserve the same physical memory on
soft reboots. Note, it is not guaranteed to be the same
location. For example, if anything about the system changes
or if booting a different kernel. It can also fail if KASLR
places the kernel at the location of where the RAM reservation
was from a previous boot, the new reservation will be at a
different location.
Any subsystem using this feature must add a way to verify
that the contents of the physical memory is from a previous
boot, as there may be cases where the memory will not be
located at the same location.
The format is size:align:label for example, to request
12 megabytes of 4096 alignment for ramoops:
reserve_mem=12M:4096:oops ramoops.mem_name=oops
reservetop= [X86-32,EARLY]
Format: nn[KMG]
Reserves a hole at the top of the kernel virtual
......
......@@ -23,6 +23,8 @@ and type of the memory area are set using three variables:
* ``mem_size`` for the size. The memory size will be rounded down to a
power of two.
* ``mem_type`` to specify if the memory type (default is pgprot_writecombine).
* ``mem_name`` to specify a memory region defined by ``reserve_mem`` command
line parameter.
Typically the default value of ``mem_type=0`` should be used as that sets the pstore
mapping to pgprot_writecombine. Setting ``mem_type=1`` attempts to use
......@@ -118,6 +120,17 @@ Setting the ramoops parameters can be done in several different manners:
return ret;
}
D. Using a region of memory reserved via ``reserve_mem`` command line
parameter. The address and size will be defined by the ``reserve_mem``
parameter. Note, that ``reserve_mem`` may not always allocate memory
in the same location, and cannot be relied upon. Testing will need
to be done, and it may not work on every machine, nor every kernel.
Consider this a "best effort" approach. The ``reserve_mem`` option
takes a size, alignment and name as arguments. The name is used
to map the memory to a label that can be retrieved by ramoops.
reserver_mem=2M:4096:oops ramoops.mem_name=oops
You can specify either RAM memory or peripheral devices' memory. However, when
specifying RAM, be sure to reserve the memory by issuing memblock_reserve()
very early in the architecture code, e.g.::
......
......@@ -50,6 +50,10 @@ module_param_hw(mem_address, ullong, other, 0400);
MODULE_PARM_DESC(mem_address,
"start of reserved RAM used to store oops/panic logs");
static char *mem_name;
module_param_named(mem_name, mem_name, charp, 0400);
MODULE_PARM_DESC(mem_name, "name of kernel param that holds addr");
static ulong mem_size;
module_param(mem_size, ulong, 0400);
MODULE_PARM_DESC(mem_size,
......@@ -914,6 +918,16 @@ static void __init ramoops_register_dummy(void)
{
struct ramoops_platform_data pdata;
if (mem_name) {
phys_addr_t start;
phys_addr_t size;
if (reserve_mem_find_by_name(mem_name, &start, &size)) {
mem_address = start;
mem_size = size;
}
}
/*
* Prepare a dummy platform data structure to carry the module
* parameters. If mem_size isn't set, then there are no module
......
......@@ -299,25 +299,6 @@ void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
void __next_mem_pfn_range_in_zone(u64 *idx, struct zone *zone,
unsigned long *out_spfn,
unsigned long *out_epfn);
/**
* for_each_free_mem_pfn_range_in_zone - iterate through zone specific free
* memblock areas
* @i: u64 used as loop variable
* @zone: zone in which all of the memory blocks reside
* @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
* @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
*
* Walks over free (memory && !reserved) areas of memblock in a specific
* zone. Available once memblock and an empty zone is initialized. The main
* assumption is that the zone start, end, and pgdat have been associated.
* This way we can use the zone to determine NUMA node, and if a given part
* of the memblock is valid for the zone.
*/
#define for_each_free_mem_pfn_range_in_zone(i, zone, p_start, p_end) \
for (i = 0, \
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end); \
i != U64_MAX; \
__next_mem_pfn_range_in_zone(&i, zone, p_start, p_end))
/**
* for_each_free_mem_pfn_range_in_zone_from - iterate through zone specific
......@@ -565,7 +546,7 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo
}
/**
* for_each_mem_region - itereate over memory regions
* for_each_mem_region - iterate over memory regions
* @region: loop variable
*/
#define for_each_mem_region(region) \
......
......@@ -4261,4 +4261,6 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn)
void vma_pgtable_walk_begin(struct vm_area_struct *vma);
void vma_pgtable_walk_end(struct vm_area_struct *vma);
int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size);
#endif /* _LINUX_MM_H */
......@@ -114,12 +114,10 @@ static struct memblock_region memblock_physmem_init_regions[INIT_PHYSMEM_REGIONS
struct memblock memblock __initdata_memblock = {
.memory.regions = memblock_memory_init_regions,
.memory.cnt = 1, /* empty dummy entry */
.memory.max = INIT_MEMBLOCK_MEMORY_REGIONS,
.memory.name = "memory",
.reserved.regions = memblock_reserved_init_regions,
.reserved.cnt = 1, /* empty dummy entry */
.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS,
.reserved.name = "reserved",
......@@ -130,7 +128,6 @@ struct memblock memblock __initdata_memblock = {
#ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP
struct memblock_type physmem = {
.regions = memblock_physmem_init_regions,
.cnt = 1, /* empty dummy entry */
.max = INIT_PHYSMEM_REGIONS,
.name = "physmem",
};
......@@ -197,8 +194,8 @@ bool __init_memblock memblock_overlaps_region(struct memblock_type *type,
for (i = 0; i < type->cnt; i++)
if (memblock_addrs_overlap(base, size, type->regions[i].base,
type->regions[i].size))
break;
return i < type->cnt;
return true;
return false;
}
/**
......@@ -356,7 +353,6 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u
/* Special case for empty arrays */
if (type->cnt == 0) {
WARN_ON(type->total_size != 0);
type->cnt = 1;
type->regions[0].base = 0;
type->regions[0].size = 0;
type->regions[0].flags = 0;
......@@ -600,12 +596,13 @@ static int __init_memblock memblock_add_range(struct memblock_type *type,
/* special case for empty array */
if (type->regions[0].size == 0) {
WARN_ON(type->cnt != 1 || type->total_size);
WARN_ON(type->cnt != 0 || type->total_size);
type->regions[0].base = base;
type->regions[0].size = size;
type->regions[0].flags = flags;
memblock_set_region_node(&type->regions[0], nid);
type->total_size = size;
type->cnt = 1;
return 0;
}
......@@ -780,7 +777,8 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt
* Walk @type and ensure that regions don't cross the boundaries defined by
* [@base, @base + @size). Crossing regions are split at the boundaries,
* which may create at most two more regions. The index of the first
* region inside the range is returned in *@start_rgn and end in *@end_rgn.
* region inside the range is returned in *@start_rgn and the index of the
* first region after the range is returned in *@end_rgn.
*
* Return:
* 0 on success, -errno on failure.
......@@ -1441,6 +1439,17 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
/*
* Detect any accidental use of these APIs after slab is ready, as at
* this moment memblock may be deinitialized already and its
* internal data may be destroyed (after execution of memblock_free_all)
*/
if (WARN_ON_ONCE(slab_is_available())) {
void *vaddr = kzalloc_node(size, GFP_NOWAIT, nid);
return vaddr ? virt_to_phys(vaddr) : 0;
}
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
......@@ -1566,13 +1575,6 @@ static void * __init memblock_alloc_internal(
{
phys_addr_t alloc;
/*
* Detect any accidental use of these APIs after slab is ready, as at
* this moment memblock may be deinitialized already and its
* internal data may be destroyed (after execution of memblock_free_all)
*/
if (WARN_ON_ONCE(slab_is_available()))
return kzalloc_node(size, GFP_NOWAIT, nid);
if (max_addr > memblock.current_limit)
max_addr = memblock.current_limit;
......@@ -2031,7 +2033,7 @@ static void __init free_memmap(unsigned long start_pfn, unsigned long end_pfn)
* downwards.
*/
pg = PAGE_ALIGN(__pa(start_pg));
pgend = __pa(end_pg) & PAGE_MASK;
pgend = PAGE_ALIGN_DOWN(__pa(end_pg));
/*
* If there are free pages between these, free the section of the
......@@ -2234,6 +2236,123 @@ void __init memblock_free_all(void)
totalram_pages_add(pages);
}
/* Keep a table to reserve named memory */
#define RESERVE_MEM_MAX_ENTRIES 8
#define RESERVE_MEM_NAME_SIZE 16
struct reserve_mem_table {
char name[RESERVE_MEM_NAME_SIZE];
phys_addr_t start;
phys_addr_t size;
};
static struct reserve_mem_table reserved_mem_table[RESERVE_MEM_MAX_ENTRIES];
static int reserved_mem_count;
/* Add wildcard region with a lookup name */
static void __init reserved_mem_add(phys_addr_t start, phys_addr_t size,
const char *name)
{
struct reserve_mem_table *map;
map = &reserved_mem_table[reserved_mem_count++];
map->start = start;
map->size = size;
strscpy(map->name, name);
}
/**
* reserve_mem_find_by_name - Find reserved memory region with a given name
* @name: The name that is attached to a reserved memory region
* @start: If found, holds the start address
* @size: If found, holds the size of the address.
*
* @start and @size are only updated if @name is found.
*
* Returns: 1 if found or 0 if not found.
*/
int reserve_mem_find_by_name(const char *name, phys_addr_t *start, phys_addr_t *size)
{
struct reserve_mem_table *map;
int i;
for (i = 0; i < reserved_mem_count; i++) {
map = &reserved_mem_table[i];
if (!map->size)
continue;
if (strcmp(name, map->name) == 0) {
*start = map->start;
*size = map->size;
return 1;
}
}
return 0;
}
EXPORT_SYMBOL_GPL(reserve_mem_find_by_name);
/*
* Parse reserve_mem=nn:align:name
*/
static int __init reserve_mem(char *p)
{
phys_addr_t start, size, align, tmp;
char *name;
char *oldp;
int len;
if (!p)
return -EINVAL;
/* Check if there's room for more reserved memory */
if (reserved_mem_count >= RESERVE_MEM_MAX_ENTRIES)
return -EBUSY;
oldp = p;
size = memparse(p, &p);
if (!size || p == oldp)
return -EINVAL;
if (*p != ':')
return -EINVAL;
align = memparse(p+1, &p);
if (*p != ':')
return -EINVAL;
/*
* memblock_phys_alloc() doesn't like a zero size align,
* but it is OK for this command to have it.
*/
if (align < SMP_CACHE_BYTES)
align = SMP_CACHE_BYTES;
name = p + 1;
len = strlen(name);
/* name needs to have length but not too big */
if (!len || len >= RESERVE_MEM_NAME_SIZE)
return -EINVAL;
/* Make sure that name has text */
for (p = name; *p; p++) {
if (!isspace(*p))
break;
}
if (!*p)
return -EINVAL;
/* Make sure the name is not already used */
if (reserve_mem_find_by_name(name, &start, &tmp))
return -EBUSY;
start = memblock_phys_alloc(size, align);
if (!start)
return -ENOMEM;
reserved_mem_add(start, size, name);
return 1;
}
__setup("reserve_mem=", reserve_mem);
#if defined(CONFIG_DEBUG_FS) && defined(CONFIG_ARCH_KEEP_MEMBLOCK)
static const char * const flagname[] = {
[ilog2(MEMBLOCK_HOTPLUG)] = "HOTPLUG",
......
......@@ -363,7 +363,7 @@ static void __init find_zone_movable_pfns_for_nodes(void)
nid = memblock_get_region_node(r);
usable_startpfn = PFN_DOWN(r->base);
usable_startpfn = memblock_region_memory_base_pfn(r);
zone_movable_pfn[nid] = zone_movable_pfn[nid] ?
min(usable_startpfn, zone_movable_pfn[nid]) :
usable_startpfn;
......@@ -676,6 +676,14 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
if (early_page_ext_enabled())
return false;
/* Always populate low zones for address-constrained allocations */
if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
return false;
if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
return true;
/*
* prev_end_pfn static that contains the end of previous zone
* No need to protect because called very early in boot before smp_init.
......@@ -685,12 +693,6 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
nr_initialised = 0;
}
/* Always populate low zones for address-constrained allocations */
if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
return false;
if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
return true;
/*
* We start only with one section of pages, more pages are added as
* needed until the rest of deferred pages are initialized.
......@@ -758,9 +760,6 @@ void __meminit reserve_bootmem_region(phys_addr_t start,
init_reserved_page(start_pfn, nid);
/* Avoid false-positive PageTail() */
INIT_LIST_HEAD(&page->lru);
/*
* no need for atomic set_bit because the struct
* page is not visible yet so nobody should
......@@ -2019,24 +2018,29 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
}
/*
* This function is meant to pre-load the iterator for the zone init.
* Specifically it walks through the ranges until we are caught up to the
* first_init_pfn value and exits there. If we never encounter the value we
* return false indicating there are no valid ranges left.
* This function is meant to pre-load the iterator for the zone init from
* a given point.
* Specifically it walks through the ranges starting with initial index
* passed to it until we are caught up to the first_init_pfn value and
* exits there. If we never encounter the value we return false indicating
* there are no valid ranges left.
*/
static bool __init
deferred_init_mem_pfn_range_in_zone(u64 *i, struct zone *zone,
unsigned long *spfn, unsigned long *epfn,
unsigned long first_init_pfn)
{
u64 j;
u64 j = *i;
if (j == 0)
__next_mem_pfn_range_in_zone(&j, zone, spfn, epfn);
/*
* Start out by walking through the ranges in this zone that have
* already been initialized. We don't need to do anything with them
* so we just need to flush them out of the system.
*/
for_each_free_mem_pfn_range_in_zone(j, zone, spfn, epfn) {
for_each_free_mem_pfn_range_in_zone_from(j, zone, spfn, epfn) {
if (*epfn <= first_init_pfn)
continue;
if (*spfn < first_init_pfn)
......@@ -2108,7 +2112,7 @@ deferred_init_memmap_chunk(unsigned long start_pfn, unsigned long end_pfn,
{
unsigned long spfn, epfn;
struct zone *zone = arg;
u64 i;
u64 i = 0;
deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, start_pfn);
......@@ -2138,8 +2142,8 @@ static int __init deferred_init_memmap(void *data)
unsigned long first_init_pfn, flags;
unsigned long start = jiffies;
struct zone *zone;
int zid, max_threads;
u64 i;
int max_threads;
u64 i = 0;
/* Bind memory initialisation thread to a local node if possible */
if (!cpumask_empty(cpumask))
......@@ -2165,27 +2169,18 @@ static int __init deferred_init_memmap(void *data)
*/
pgdat_resize_unlock(pgdat, &flags);
/* Only the highest zone is deferred so find it */
for (zid = 0; zid < MAX_NR_ZONES; zid++) {
zone = pgdat->node_zones + zid;
if (first_init_pfn < zone_end_pfn(zone))
break;
}
/* If the zone is empty somebody else may have cleared out the zone */
if (!deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
first_init_pfn))
goto zone_empty;
/* Only the highest zone is deferred */
zone = pgdat->node_zones + pgdat->nr_zones - 1;
max_threads = deferred_page_init_max_threads(cpumask);
while (spfn < epfn) {
unsigned long epfn_align = ALIGN(epfn, PAGES_PER_SECTION);
while (deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn, first_init_pfn)) {
first_init_pfn = ALIGN(epfn, PAGES_PER_SECTION);
struct padata_mt_job job = {
.thread_fn = deferred_init_memmap_chunk,
.fn_arg = zone,
.start = spfn,
.size = epfn_align - spfn,
.size = first_init_pfn - spfn,
.align = PAGES_PER_SECTION,
.min_chunk = PAGES_PER_SECTION,
.max_threads = max_threads,
......@@ -2193,12 +2188,10 @@ static int __init deferred_init_memmap(void *data)
};
padata_do_multithreaded(&job);
deferred_init_mem_pfn_range_in_zone(&i, zone, &spfn, &epfn,
epfn_align);
}
zone_empty:
/* Sanity check that the next zone really is unpopulated */
WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
WARN_ON(pgdat->nr_zones < MAX_NR_ZONES && populated_zone(++zone));
pr_info("node %d deferred pages initialised in %ums\n",
pgdat->node_id, jiffies_to_msecs(jiffies - start));
......@@ -2225,7 +2218,7 @@ bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
unsigned long first_deferred_pfn = pgdat->first_deferred_pfn;
unsigned long spfn, epfn, flags;
unsigned long nr_pages = 0;
u64 i;
u64 i = 0;
/* Only the last zone may have deferred pages */
if (zone_end_pfn(zone) != pgdat_end_pfn(pgdat))
......
......@@ -12,6 +12,7 @@
#define PHYS_ADDR_MAX (~(phys_addr_t)0)
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
#define PAGE_ALIGN_DOWN(addr) ALIGN_DOWN(addr, PAGE_SIZE)
#define __va(x) ((void *)((unsigned long)(x)))
#define __pa(x) ((unsigned long)(x))
......
......@@ -13,4 +13,9 @@
#define NUMA_NO_NODE (-1)
static inline bool numa_valid_node(int nid)
{
return nid >= 0 && nid < MAX_NUMNODES;
}
#endif /* _LINUX_NUMA_H */
This diff is collapsed.
......@@ -40,13 +40,13 @@ void reset_memblock_regions(void)
{
memset(memblock.memory.regions, 0,
memblock.memory.cnt * sizeof(struct memblock_region));
memblock.memory.cnt = 1;
memblock.memory.cnt = 0;
memblock.memory.max = INIT_MEMBLOCK_REGIONS;
memblock.memory.total_size = 0;
memset(memblock.reserved.regions, 0,
memblock.reserved.cnt * sizeof(struct memblock_region));
memblock.reserved.cnt = 1;
memblock.reserved.cnt = 0;
memblock.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS;
memblock.reserved.total_size = 0;
}
......@@ -61,7 +61,7 @@ void reset_memblock_attributes(void)
static inline void fill_memblock(void)
{
memset(memory_block.base, 1, MEM_SIZE);
memset(memory_block.base, 1, PHYS_MEM_SIZE);
}
void setup_memblock(void)
......@@ -103,7 +103,7 @@ void setup_numa_memblock(const unsigned int node_fracs[])
void dummy_physical_memory_init(void)
{
memory_block.base = malloc(MEM_SIZE);
memory_block.base = malloc(PHYS_MEM_SIZE);
assert(memory_block.base);
fill_memblock();
}
......
......@@ -12,6 +12,7 @@
#include <../selftests/kselftest.h>
#define MEM_SIZE SZ_32K
#define PHYS_MEM_SIZE SZ_16M
#define NUMA_NODES 8
#define INIT_MEMBLOCK_REGIONS 128
......@@ -39,6 +40,9 @@ enum test_flags {
assert((_expected) == (_seen)); \
} while (0)
#define ASSERT_TRUE(_seen) ASSERT_EQ(true, _seen)
#define ASSERT_FALSE(_seen) ASSERT_EQ(false, _seen)
/**
* ASSERT_NE():
* Check the condition
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment