Commit f913a660 authored by Vasily Gorbik's avatar Vasily Gorbik Committed by Heiko Carstens

s390/boot: rework decompressor reserved tracking

Currently several approaches for finding unused memory in decompressor
are utilized. While "safe_addr" grows towards higher addresses, vmem
code allocates paging structures top down. The former requires careful
ordering. In addition to that ipl report handling code verifies potential
intersections with secure boot certificates on its own. Neither of two
approaches are memory holes aware and consistent with each other in low
memory conditions.

To solve that, existing approaches are generalized and combined
together, as well as online memory ranges are now taken into
consideration.

physmem_info has been extended to contain reserved memory ranges. New
set of functions allow to handle reserves and find unused memory.
All reserves and memory allocations are "typed". In case of out of
memory condition decompressor fails with detailed info on current
reserved ranges and usable online memory.

Linux version 6.2.0 ...
Kernel command line: ... mem=100M
Our of memory allocating 100000 bytes 100000 aligned in range 0:5800000
Reserved memory ranges:
0000000000000000 0000000003e33000 DECOMPRESSOR
0000000003f00000 00000000057648a3 INITRD
00000000063e0000 00000000063e8000 VMEM
00000000063eb000 00000000063f4000 VMEM
00000000063f7800 0000000006400000 VMEM
0000000005800000 0000000006300000 KASAN
Usable online memory ranges (info source: sclp read info [3]):
0000000000000000 0000000006400000
Usable online memory total: 6400000 Reserved: 61b10a3 Free: 24ef5d
Call Trace:
(sp:000000000002bd58 [<0000000000012a70>] physmem_alloc_top_down+0x60/0x14c)
 sp:000000000002bdc8 [<0000000000013756>] _pa+0x56/0x6a
 sp:000000000002bdf0 [<0000000000013bcc>] pgtable_populate+0x45c/0x65e
 sp:000000000002be90 [<00000000000140aa>] setup_vmem+0x2da/0x424
 sp:000000000002bec8 [<0000000000011c20>] startup_kernel+0x428/0x8b4
 sp:000000000002bf60 [<00000000000100f4>] startup_normal+0xd4/0xd4

physmem_alloc_range allows to find free memory in specified range. It
should be used for one time allocations only like finding position for
amode31 and vmlinux.
physmem_alloc_top_down can be used just like physmem_alloc_range, but
it also allows multiple allocations per type and tries to merge sequential
allocations together. Which is useful for paging structures allocations.
If sequential allocations cannot be merged together they are "chained",
allowing easy per type reserved ranges enumeration and migration to
memblock later. Extra "struct reserved_range" allocated for chaining are
not tracked or reserved but rely on the fact that both
physmem_alloc_range and physmem_alloc_top_down search for free memory
only below current top down allocator position. All reserved ranges
should be transferred to memblock before memblock allocations are
enabled.

The startup code has been reordered to delay any memory allocations until
online memory ranges are detected and occupied memory ranges are marked as
reserved to be excluded from follow-up allocations.
Ipl report certificates are a special case, ipl report certificates list
is checked together with other memory reserves until certificates are
saved elsewhere.
KASAN required memory for shadow memory allocation and mapping is reserved
as 1 large chunk which is later passed to KASAN early initialization code.
Acked-by: default avatarHeiko Carstens <hca@linux.ibm.com>
Reviewed-by: default avatarAlexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: default avatarVasily Gorbik <gor@linux.ibm.com>
Signed-off-by: default avatarHeiko Carstens <hca@linux.ibm.com>
parent 8c37cb7d
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <asm/physmem_info.h>
struct machine_info { struct machine_info {
unsigned char has_edat1 : 1; unsigned char has_edat1 : 1;
unsigned char has_edat2 : 1; unsigned char has_edat2 : 1;
...@@ -33,21 +35,34 @@ struct vmlinux_info { ...@@ -33,21 +35,34 @@ struct vmlinux_info {
}; };
void startup_kernel(void); void startup_kernel(void);
unsigned long detect_memory(unsigned long *safe_addr); unsigned long detect_max_physmem_end(void);
void detect_physmem_online_ranges(unsigned long max_physmem_end);
void physmem_set_usable_limit(unsigned long limit); void physmem_set_usable_limit(unsigned long limit);
void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size);
void physmem_free(enum reserved_range_type type);
/* for continuous/multiple allocations per type */
unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
unsigned long align);
/* for single allocations, 1 per type */
unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
unsigned long align, unsigned long min, unsigned long max,
bool die_on_oom);
bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
unsigned long *intersection_start);
bool is_ipl_block_dump(void); bool is_ipl_block_dump(void);
void store_ipl_parmblock(void); void store_ipl_parmblock(void);
unsigned long read_ipl_report(unsigned long safe_addr); int read_ipl_report(void);
void save_ipl_cert_comp_list(void);
void setup_boot_command_line(void); void setup_boot_command_line(void);
void parse_boot_command_line(void); void parse_boot_command_line(void);
void verify_facilities(void); void verify_facilities(void);
void print_missing_facilities(void); void print_missing_facilities(void);
void sclp_early_setup_buffer(void); void sclp_early_setup_buffer(void);
void print_pgm_check_info(void); void print_pgm_check_info(void);
unsigned long get_random_base(unsigned long safe_addr); unsigned long get_random_base(void);
void setup_vmem(unsigned long asce_limit); void setup_vmem(unsigned long asce_limit);
unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total);
void __printf(1, 2) decompressor_printk(const char *fmt, ...); void __printf(1, 2) decompressor_printk(const char *fmt, ...);
void print_stacktrace(unsigned long sp);
void error(char *m); void error(char *m);
extern struct machine_info machine; extern struct machine_info machine;
...@@ -62,7 +77,7 @@ extern char __boot_data_start[], __boot_data_end[]; ...@@ -62,7 +77,7 @@ extern char __boot_data_start[], __boot_data_end[];
extern char __boot_data_preserved_start[], __boot_data_preserved_end[]; extern char __boot_data_preserved_start[], __boot_data_preserved_end[];
extern char _decompressor_syms_start[], _decompressor_syms_end[]; extern char _decompressor_syms_start[], _decompressor_syms_end[];
extern char _stack_start[], _stack_end[]; extern char _stack_start[], _stack_end[];
extern char _end[]; extern char _end[], _decompressor_end[];
extern unsigned char _compressed_start[]; extern unsigned char _compressed_start[];
extern unsigned char _compressed_end[]; extern unsigned char _compressed_end[];
extern struct vmlinux_info _vmlinux_info; extern struct vmlinux_info _vmlinux_info;
...@@ -70,5 +85,10 @@ extern struct vmlinux_info _vmlinux_info; ...@@ -70,5 +85,10 @@ extern struct vmlinux_info _vmlinux_info;
#define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore)) #define __abs_lowcore_pa(x) (((unsigned long)(x) - __abs_lowcore) % sizeof(struct lowcore))
static inline bool intersects(unsigned long addr0, unsigned long size0,
unsigned long addr1, unsigned long size1)
{
return addr0 + size0 > addr1 && addr1 + size1 > addr0;
}
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* BOOT_BOOT_H */ #endif /* BOOT_BOOT_H */
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <asm/sclp.h> #include <asm/sclp.h>
#include <asm/sections.h> #include <asm/sections.h>
#include <asm/boot_data.h> #include <asm/boot_data.h>
#include <asm/physmem_info.h>
#include <uapi/asm/ipl.h> #include <uapi/asm/ipl.h>
#include "boot.h" #include "boot.h"
...@@ -16,20 +17,16 @@ unsigned long __bootdata_preserved(ipl_cert_list_size); ...@@ -16,20 +17,16 @@ unsigned long __bootdata_preserved(ipl_cert_list_size);
unsigned long __bootdata(early_ipl_comp_list_addr); unsigned long __bootdata(early_ipl_comp_list_addr);
unsigned long __bootdata(early_ipl_comp_list_size); unsigned long __bootdata(early_ipl_comp_list_size);
static struct ipl_rb_certificates *certs;
static struct ipl_rb_components *comps;
static bool ipl_report_needs_saving;
#define for_each_rb_entry(entry, rb) \ #define for_each_rb_entry(entry, rb) \
for (entry = rb->entries; \ for (entry = rb->entries; \
(void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \ (void *) entry + sizeof(*entry) <= (void *) rb + rb->len; \
entry++) entry++)
static inline bool intersects(unsigned long addr0, unsigned long size0, static unsigned long get_cert_comp_list_size(void)
unsigned long addr1, unsigned long size1)
{
return addr0 + size0 > addr1 && addr1 + size1 > addr0;
}
static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
struct ipl_rb_certificates *certs,
unsigned long safe_addr)
{ {
struct ipl_rb_certificate_entry *cert; struct ipl_rb_certificate_entry *cert;
struct ipl_rb_component_entry *comp; struct ipl_rb_component_entry *comp;
...@@ -44,44 +41,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps, ...@@ -44,44 +41,27 @@ static unsigned long find_bootdata_space(struct ipl_rb_components *comps,
ipl_cert_list_size = 0; ipl_cert_list_size = 0;
for_each_rb_entry(cert, certs) for_each_rb_entry(cert, certs)
ipl_cert_list_size += sizeof(unsigned int) + cert->len; ipl_cert_list_size += sizeof(unsigned int) + cert->len;
size = ipl_cert_list_size + early_ipl_comp_list_size; return ipl_cert_list_size + early_ipl_comp_list_size;
}
/* bool ipl_report_certs_intersects(unsigned long addr, unsigned long size,
* Start from safe_addr to find a free memory area large unsigned long *intersection_start)
* enough for the IPL report boot data. This area is used {
* for ipl_cert_list_addr/ipl_cert_list_size and struct ipl_rb_certificate_entry *cert;
* early_ipl_comp_list_addr/early_ipl_comp_list_size. It must
* not overlap with any component or any certificate. if (!ipl_report_needs_saving)
*/ return false;
repeat:
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && initrd_data.start && initrd_data.size && for_each_rb_entry(cert, certs) {
intersects(initrd_data.start, initrd_data.size, safe_addr, size)) if (intersects(addr, size, cert->addr, cert->len)) {
safe_addr = initrd_data.start + initrd_data.size; *intersection_start = cert->addr;
if (intersects(safe_addr, size, (unsigned long)comps, comps->len)) { return true;
safe_addr = (unsigned long)comps + comps->len;
goto repeat;
}
for_each_rb_entry(comp, comps)
if (intersects(safe_addr, size, comp->addr, comp->len)) {
safe_addr = comp->addr + comp->len;
goto repeat;
} }
if (intersects(safe_addr, size, (unsigned long)certs, certs->len)) {
safe_addr = (unsigned long)certs + certs->len;
goto repeat;
} }
for_each_rb_entry(cert, certs) return false;
if (intersects(safe_addr, size, cert->addr, cert->len)) {
safe_addr = cert->addr + cert->len;
goto repeat;
}
early_ipl_comp_list_addr = safe_addr;
ipl_cert_list_addr = safe_addr + early_ipl_comp_list_size;
return safe_addr + size;
} }
static void copy_components_bootdata(struct ipl_rb_components *comps) static void copy_components_bootdata(void)
{ {
struct ipl_rb_component_entry *comp, *ptr; struct ipl_rb_component_entry *comp, *ptr;
...@@ -90,7 +70,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps) ...@@ -90,7 +70,7 @@ static void copy_components_bootdata(struct ipl_rb_components *comps)
memcpy(ptr++, comp, sizeof(*ptr)); memcpy(ptr++, comp, sizeof(*ptr));
} }
static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) static void copy_certificates_bootdata(void)
{ {
struct ipl_rb_certificate_entry *cert; struct ipl_rb_certificate_entry *cert;
void *ptr; void *ptr;
...@@ -104,10 +84,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs) ...@@ -104,10 +84,8 @@ static void copy_certificates_bootdata(struct ipl_rb_certificates *certs)
} }
} }
unsigned long read_ipl_report(unsigned long safe_addr) int read_ipl_report(void)
{ {
struct ipl_rb_certificates *certs;
struct ipl_rb_components *comps;
struct ipl_pl_hdr *pl_hdr; struct ipl_pl_hdr *pl_hdr;
struct ipl_rl_hdr *rl_hdr; struct ipl_rl_hdr *rl_hdr;
struct ipl_rb_hdr *rb_hdr; struct ipl_rb_hdr *rb_hdr;
...@@ -120,7 +98,7 @@ unsigned long read_ipl_report(unsigned long safe_addr) ...@@ -120,7 +98,7 @@ unsigned long read_ipl_report(unsigned long safe_addr)
*/ */
if (!ipl_block_valid || if (!ipl_block_valid ||
!(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR)) !(ipl_block.hdr.flags & IPL_PL_FLAG_IPLSR))
return safe_addr; return -1;
ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL); ipl_secure_flag = !!(ipl_block.hdr.flags & IPL_PL_FLAG_SIPL);
/* /*
* There is an IPL report, to find it load the pointer to the * There is an IPL report, to find it load the pointer to the
...@@ -158,16 +136,30 @@ unsigned long read_ipl_report(unsigned long safe_addr) ...@@ -158,16 +136,30 @@ unsigned long read_ipl_report(unsigned long safe_addr)
* With either the component list or the certificate list * With either the component list or the certificate list
* missing the kernel will stay ignorant of secure IPL. * missing the kernel will stay ignorant of secure IPL.
*/ */
if (!comps || !certs) if (!comps || !certs) {
return safe_addr; certs = NULL;
return -1;
}
/* ipl_report_needs_saving = true;
* Copy component and certificate list to a safe area physmem_reserve(RR_IPLREPORT, (unsigned long)pl_hdr,
* where the decompressed kernel can find them. (unsigned long)rl_end - (unsigned long)pl_hdr);
*/ return 0;
safe_addr = find_bootdata_space(comps, certs, safe_addr); }
copy_components_bootdata(comps);
copy_certificates_bootdata(certs); void save_ipl_cert_comp_list(void)
{
unsigned long size;
if (!ipl_report_needs_saving)
return;
size = get_cert_comp_list_size();
early_ipl_comp_list_addr = physmem_alloc_top_down(RR_CERT_COMP_LIST, size, sizeof(int));
ipl_cert_list_addr = early_ipl_comp_list_addr + early_ipl_comp_list_size;
return safe_addr; copy_components_bootdata();
copy_certificates_bootdata();
physmem_free(RR_IPLREPORT);
ipl_report_needs_saving = false;
} }
...@@ -91,113 +91,16 @@ static int get_random(unsigned long limit, unsigned long *value) ...@@ -91,113 +91,16 @@ static int get_random(unsigned long limit, unsigned long *value)
return 0; return 0;
} }
/* unsigned long get_random_base(void)
* To randomize kernel base address we have to consider several facts:
* 1. physical online memory might not be continuous and have holes. physmem
* info contains list of online memory ranges we should consider.
* 2. we have several memory regions which are occupied and we should not
* overlap and destroy them. Currently safe_addr tells us the border below
* which all those occupied regions are. We are safe to use anything above
* safe_addr.
* 3. the upper limit might apply as well, even if memory above that limit is
* online. Currently those limitations are:
* 3.1. Limit set by "mem=" kernel command line option
* 3.2. memory reserved at the end for kasan initialization.
* 4. kernel base address must be aligned to THREAD_SIZE (kernel stack size).
* Which is required for CONFIG_CHECK_STACK. Currently THREAD_SIZE is 4 pages
* (16 pages when the kernel is built with kasan enabled)
* Assumptions:
* 1. kernel size (including .bss size) and upper memory limit are page aligned.
* 2. physmem online region start is THREAD_SIZE aligned / end is PAGE_SIZE
* aligned (in practice memory configurations granularity on z/VM and LPAR
* is 1mb).
*
* To guarantee uniform distribution of kernel base address among all suitable
* addresses we generate random value just once. For that we need to build a
* continuous range in which every value would be suitable. We can build this
* range by simply counting all suitable addresses (let's call them positions)
* which would be valid as kernel base address. To count positions we iterate
* over online memory ranges. For each range which is big enough for the
* kernel image we count all suitable addresses we can put the kernel image at
* that is
* (end - start - kernel_size) / THREAD_SIZE + 1
* Two functions count_valid_kernel_positions and position_to_address help
* to count positions in memory range given and then convert position back
* to address.
*/
static unsigned long count_valid_kernel_positions(unsigned long kernel_size,
unsigned long _min,
unsigned long _max)
{
unsigned long start, end, pos = 0;
int i;
for_each_physmem_usable_range(i, &start, &end) {
if (_min >= end)
continue;
if (start >= _max)
break;
start = max(_min, start);
end = min(_max, end);
if (end - start < kernel_size)
continue;
pos += (end - start - kernel_size) / THREAD_SIZE + 1;
}
return pos;
}
static unsigned long position_to_address(unsigned long pos, unsigned long kernel_size,
unsigned long _min, unsigned long _max)
{
unsigned long start, end;
int i;
for_each_physmem_usable_range(i, &start, &end) {
if (_min >= end)
continue;
if (start >= _max)
break;
start = max(_min, start);
end = min(_max, end);
if (end - start < kernel_size)
continue;
if ((end - start - kernel_size) / THREAD_SIZE + 1 >= pos)
return start + (pos - 1) * THREAD_SIZE;
pos -= (end - start - kernel_size) / THREAD_SIZE + 1;
}
return 0;
}
unsigned long get_random_base(unsigned long safe_addr)
{ {
unsigned long usable_total = get_physmem_usable_total(); unsigned long vmlinux_size = vmlinux.image_size + vmlinux.bss_size;
unsigned long memory_limit = get_physmem_usable_end(); unsigned long minimal_pos = vmlinux.default_lma + vmlinux_size;
unsigned long base_pos, max_pos, kernel_size; unsigned long random;
int i;
/* /* [vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size : physmem_info.usable] */
* Avoid putting kernel in the end of physical memory if (get_random(physmem_info.usable - minimal_pos, &random))
* which vmem and kasan code will use for shadow memory and
* pgtable mapping allocations.
*/
memory_limit -= kasan_estimate_memory_needs(usable_total);
memory_limit -= vmem_estimate_memory_needs(usable_total);
safe_addr = ALIGN(safe_addr, THREAD_SIZE);
kernel_size = vmlinux.image_size + vmlinux.bss_size;
if (safe_addr + kernel_size > memory_limit)
return 0; return 0;
max_pos = count_valid_kernel_positions(kernel_size, safe_addr, memory_limit); return physmem_alloc_range(RR_VMLINUX, vmlinux_size, THREAD_SIZE,
if (!max_pos) { vmlinux.default_lma, minimal_pos + random, false);
sclp_early_printk("KASLR disabled: not enough memory\n");
return 0;
}
/* we need a value in the range [1, base_pos] inclusive */
if (get_random(max_pos, &base_pos))
return 0;
return position_to_address(base_pos + 1, kernel_size, safe_addr, memory_limit);
} }
...@@ -123,11 +123,10 @@ void decompressor_printk(const char *fmt, ...) ...@@ -123,11 +123,10 @@ void decompressor_printk(const char *fmt, ...)
sclp_early_printk(buf); sclp_early_printk(buf);
} }
static noinline void print_stacktrace(void) void print_stacktrace(unsigned long sp)
{ {
struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start, struct stack_info boot_stack = { STACK_TYPE_TASK, (unsigned long)_stack_start,
(unsigned long)_stack_end }; (unsigned long)_stack_end };
unsigned long sp = S390_lowcore.gpregs_save_area[15];
bool first = true; bool first = true;
decompressor_printk("Call Trace:\n"); decompressor_printk("Call Trace:\n");
...@@ -173,7 +172,7 @@ void print_pgm_check_info(void) ...@@ -173,7 +172,7 @@ void print_pgm_check_info(void)
gpregs[8], gpregs[9], gpregs[10], gpregs[11]); gpregs[8], gpregs[9], gpregs[10], gpregs[11]);
decompressor_printk(" %016lx %016lx %016lx %016lx\n", decompressor_printk(" %016lx %016lx %016lx %016lx\n",
gpregs[12], gpregs[13], gpregs[14], gpregs[15]); gpregs[12], gpregs[13], gpregs[14], gpregs[15]);
print_stacktrace(); print_stacktrace(S390_lowcore.gpregs_save_area[15]);
decompressor_printk("Last Breaking-Event-Address:\n"); decompressor_printk("Last Breaking-Event-Address:\n");
decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break, decompressor_printk(" [<%016lx>] %pS\n", (unsigned long)S390_lowcore.pgm_last_break,
(void *)S390_lowcore.pgm_last_break); (void *)S390_lowcore.pgm_last_break);
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/processor.h>
#include <linux/errno.h> #include <linux/errno.h>
#include <linux/init.h> #include <linux/init.h>
#include <asm/setup.h>
#include <asm/processor.h>
#include <asm/sclp.h>
#include <asm/sections.h>
#include <asm/physmem_info.h> #include <asm/physmem_info.h>
#include <asm/stacktrace.h>
#include <asm/boot_data.h>
#include <asm/sparsemem.h> #include <asm/sparsemem.h>
#include <asm/sections.h>
#include <asm/setup.h>
#include <asm/sclp.h>
#include <asm/uv.h>
#include "decompressor.h" #include "decompressor.h"
#include "boot.h" #include "boot.h"
struct physmem_info __bootdata(physmem_info); struct physmem_info __bootdata(physmem_info);
static unsigned int physmem_alloc_ranges;
static unsigned long physmem_alloc_pos;
/* up to 256 storage elements, 1020 subincrements each */ /* up to 256 storage elements, 1020 subincrements each */
#define ENTRIES_EXTENDED_MAX \ #define ENTRIES_EXTENDED_MAX \
...@@ -20,6 +25,11 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n) ...@@ -20,6 +25,11 @@ static struct physmem_range *__get_physmem_range_ptr(u32 n)
{ {
if (n < MEM_INLINED_ENTRIES) if (n < MEM_INLINED_ENTRIES)
return &physmem_info.online[n]; return &physmem_info.online[n];
if (unlikely(!physmem_info.online_extended)) {
physmem_info.online_extended = (struct physmem_range *)physmem_alloc_range(
RR_MEM_DETECT_EXTENDED, ENTRIES_EXTENDED_MAX, sizeof(long), 0,
physmem_alloc_pos, true);
}
return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES]; return &physmem_info.online_extended[n - MEM_INLINED_ENTRIES];
} }
...@@ -143,49 +153,171 @@ static unsigned long search_mem_end(void) ...@@ -143,49 +153,171 @@ static unsigned long search_mem_end(void)
return (offset + 1) << 20; return (offset + 1) << 20;
} }
unsigned long detect_memory(unsigned long *safe_addr) unsigned long detect_max_physmem_end(void)
{ {
unsigned long max_physmem_end = 0; unsigned long max_physmem_end = 0;
sclp_early_get_memsize(&max_physmem_end); if (!sclp_early_get_memsize(&max_physmem_end)) {
physmem_info.online_extended = (struct physmem_range *)ALIGN(*safe_addr, sizeof(u64)); physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
} else {
max_physmem_end = search_mem_end();
physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
}
return max_physmem_end;
}
void detect_physmem_online_ranges(unsigned long max_physmem_end)
{
if (!sclp_early_read_storage_info()) { if (!sclp_early_read_storage_info()) {
physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO; physmem_info.info_source = MEM_DETECT_SCLP_STOR_INFO;
} else if (!diag260()) { } else if (!diag260()) {
physmem_info.info_source = MEM_DETECT_DIAG260; physmem_info.info_source = MEM_DETECT_DIAG260;
max_physmem_end = max_physmem_end ?: get_physmem_usable_end();
} else if (max_physmem_end) { } else if (max_physmem_end) {
add_physmem_online_range(0, max_physmem_end); add_physmem_online_range(0, max_physmem_end);
physmem_info.info_source = MEM_DETECT_SCLP_READ_INFO;
} else {
max_physmem_end = search_mem_end();
add_physmem_online_range(0, max_physmem_end);
physmem_info.info_source = MEM_DETECT_BIN_SEARCH;
} }
}
if (physmem_info.range_count > MEM_INLINED_ENTRIES) { void physmem_set_usable_limit(unsigned long limit)
*safe_addr += (physmem_info.range_count - MEM_INLINED_ENTRIES) * {
sizeof(struct physmem_range); physmem_info.usable = limit;
physmem_alloc_pos = limit;
}
static void die_oom(unsigned long size, unsigned long align, unsigned long min, unsigned long max)
{
unsigned long start, end, total_mem = 0, total_reserved_mem = 0;
struct reserved_range *range;
enum reserved_range_type t;
int i;
decompressor_printk("Linux version %s\n", kernel_version);
if (!is_prot_virt_guest() && early_command_line[0])
decompressor_printk("Kernel command line: %s\n", early_command_line);
decompressor_printk("Out of memory allocating %lx bytes %lx aligned in range %lx:%lx\n",
size, align, min, max);
decompressor_printk("Reserved memory ranges:\n");
for_each_physmem_reserved_range(t, range, &start, &end) {
decompressor_printk("%016lx %016lx %s\n", start, end, get_rr_type_name(t));
total_reserved_mem += end - start;
}
decompressor_printk("Usable online memory ranges (info source: %s [%x]):\n",
get_physmem_info_source(), physmem_info.info_source);
for_each_physmem_usable_range(i, &start, &end) {
decompressor_printk("%016lx %016lx\n", start, end);
total_mem += end - start;
} }
decompressor_printk("Usable online memory total: %lx Reserved: %lx Free: %lx\n",
total_mem, total_reserved_mem,
total_mem > total_reserved_mem ? total_mem - total_reserved_mem : 0);
print_stacktrace(current_frame_address());
sclp_early_printk("\n\n -- System halted\n");
disabled_wait();
}
return max_physmem_end; void physmem_reserve(enum reserved_range_type type, unsigned long addr, unsigned long size)
{
physmem_info.reserved[type].start = addr;
physmem_info.reserved[type].end = addr + size;
} }
void physmem_set_usable_limit(unsigned long limit) void physmem_free(enum reserved_range_type type)
{ {
struct physmem_range *range; physmem_info.reserved[type].start = 0;
int i; physmem_info.reserved[type].end = 0;
}
/* make sure mem_detect.usable ends up within online memory block */ static bool __physmem_alloc_intersects(unsigned long addr, unsigned long size,
for (i = 0; i < physmem_info.range_count; i++) { unsigned long *intersection_start)
range = __get_physmem_range_ptr(i); {
if (range->start >= limit) unsigned long res_addr, res_size;
break; int t;
if (range->end >= limit) {
physmem_info.usable = limit; for (t = 0; t < RR_MAX; t++) {
if (!get_physmem_reserved(t, &res_addr, &res_size))
continue;
if (intersects(addr, size, res_addr, res_size)) {
*intersection_start = res_addr;
return true;
}
}
return ipl_report_certs_intersects(addr, size, intersection_start);
}
static unsigned long __physmem_alloc_range(unsigned long size, unsigned long align,
unsigned long min, unsigned long max,
unsigned int from_ranges, unsigned int *ranges_left,
bool die_on_oom)
{
unsigned int nranges = from_ranges ?: physmem_info.range_count;
unsigned long range_start, range_end;
unsigned long intersection_start;
unsigned long addr, pos = max;
align = max(align, 8UL);
while (nranges) {
__get_physmem_range(nranges - 1, &range_start, &range_end, false);
pos = min(range_end, pos);
if (round_up(min, align) + size > pos)
break; break;
addr = round_down(pos - size, align);
if (range_start > addr) {
nranges--;
continue;
}
if (__physmem_alloc_intersects(addr, size, &intersection_start)) {
pos = intersection_start;
continue;
}
if (ranges_left)
*ranges_left = nranges;
return addr;
}
if (die_on_oom)
die_oom(size, align, min, max);
return 0;
}
unsigned long physmem_alloc_range(enum reserved_range_type type, unsigned long size,
unsigned long align, unsigned long min, unsigned long max,
bool die_on_oom)
{
unsigned long addr;
max = min(max, physmem_alloc_pos);
addr = __physmem_alloc_range(size, align, min, max, 0, NULL, die_on_oom);
if (addr)
physmem_reserve(type, addr, size);
return addr;
}
unsigned long physmem_alloc_top_down(enum reserved_range_type type, unsigned long size,
unsigned long align)
{
struct reserved_range *range = &physmem_info.reserved[type];
struct reserved_range *new_range;
unsigned int ranges_left;
unsigned long addr;
addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos, physmem_alloc_ranges,
&ranges_left, true);
/* if not a consecutive allocation of the same type or first allocation */
if (range->start != addr + size) {
if (range->end) {
physmem_alloc_pos = __physmem_alloc_range(
sizeof(struct reserved_range), 0, 0, physmem_alloc_pos,
physmem_alloc_ranges, &ranges_left, true);
new_range = (struct reserved_range *)physmem_alloc_pos;
*new_range = *range;
range->chain = new_range;
addr = __physmem_alloc_range(size, align, 0, physmem_alloc_pos,
ranges_left, &ranges_left, true);
} }
physmem_info.usable = range->end; range->end = addr + size;
} }
range->start = addr;
physmem_alloc_pos = addr;
physmem_alloc_ranges = ranges_left;
return addr;
} }
...@@ -21,7 +21,6 @@ unsigned long __bootdata_preserved(__kaslr_offset); ...@@ -21,7 +21,6 @@ unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata_preserved(__abs_lowcore); unsigned long __bootdata_preserved(__abs_lowcore);
unsigned long __bootdata_preserved(__memcpy_real_area); unsigned long __bootdata_preserved(__memcpy_real_area);
pte_t *__bootdata_preserved(memcpy_real_ptep); pte_t *__bootdata_preserved(memcpy_real_ptep);
unsigned long __bootdata(__amode31_base);
unsigned long __bootdata_preserved(VMALLOC_START); unsigned long __bootdata_preserved(VMALLOC_START);
unsigned long __bootdata_preserved(VMALLOC_END); unsigned long __bootdata_preserved(VMALLOC_END);
struct page *__bootdata_preserved(vmemmap); struct page *__bootdata_preserved(vmemmap);
...@@ -29,7 +28,6 @@ unsigned long __bootdata_preserved(vmemmap_size); ...@@ -29,7 +28,6 @@ unsigned long __bootdata_preserved(vmemmap_size);
unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_VADDR);
unsigned long __bootdata_preserved(MODULES_END); unsigned long __bootdata_preserved(MODULES_END);
unsigned long __bootdata(ident_map_size); unsigned long __bootdata(ident_map_size);
struct initrd_data __bootdata(initrd_data);
u64 __bootdata_preserved(stfle_fac_list[16]); u64 __bootdata_preserved(stfle_fac_list[16]);
u64 __bootdata_preserved(alt_stfle_fac_list[16]); u64 __bootdata_preserved(alt_stfle_fac_list[16]);
...@@ -75,17 +73,20 @@ unsigned long mem_safe_offset(void) ...@@ -75,17 +73,20 @@ unsigned long mem_safe_offset(void)
} }
#endif #endif
static unsigned long rescue_initrd(unsigned long safe_addr) static void rescue_initrd(unsigned long min, unsigned long max)
{ {
unsigned long old_addr, addr, size;
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
return safe_addr; return;
if (!initrd_data.start || !initrd_data.size) if (!get_physmem_reserved(RR_INITRD, &addr, &size))
return safe_addr; return;
if (initrd_data.start < safe_addr) { if (addr >= min && addr + size <= max)
memmove((void *)safe_addr, (void *)initrd_data.start, initrd_data.size); return;
initrd_data.start = safe_addr; old_addr = addr;
} physmem_free(RR_INITRD);
return initrd_data.start + initrd_data.size; addr = physmem_alloc_top_down(RR_INITRD, size, 0);
memmove((void *)addr, (void *)old_addr, size);
} }
static void copy_bootdata(void) static void copy_bootdata(void)
...@@ -267,46 +268,52 @@ static void offset_vmlinux_info(unsigned long offset) ...@@ -267,46 +268,52 @@ static void offset_vmlinux_info(unsigned long offset)
vmlinux.invalid_pg_dir_off += offset; vmlinux.invalid_pg_dir_off += offset;
} }
static unsigned long reserve_amode31(unsigned long safe_addr)
{
__amode31_base = PAGE_ALIGN(safe_addr);
return __amode31_base + vmlinux.amode31_size;
}
void startup_kernel(void) void startup_kernel(void)
{ {
unsigned long max_physmem_end; unsigned long max_physmem_end;
unsigned long random_lma; unsigned long random_lma;
unsigned long safe_addr;
unsigned long asce_limit; unsigned long asce_limit;
unsigned long safe_addr;
void *img; void *img;
psw_t psw; psw_t psw;
initrd_data.start = parmarea.initrd_start; setup_lpp();
initrd_data.size = parmarea.initrd_size; safe_addr = mem_safe_offset();
/*
* reserve decompressor memory together with decompression heap, buffer and
* memory which might be occupied by uncompressed kernel at default 1Mb
* position (if KASLR is off or failed).
*/
physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size);
oldmem_data.start = parmarea.oldmem_base; oldmem_data.start = parmarea.oldmem_base;
oldmem_data.size = parmarea.oldmem_size; oldmem_data.size = parmarea.oldmem_size;
setup_lpp();
store_ipl_parmblock(); store_ipl_parmblock();
safe_addr = mem_safe_offset(); read_ipl_report();
safe_addr = reserve_amode31(safe_addr);
safe_addr = read_ipl_report(safe_addr);
uv_query_info(); uv_query_info();
safe_addr = rescue_initrd(safe_addr);
sclp_early_read_info(); sclp_early_read_info();
setup_boot_command_line(); setup_boot_command_line();
parse_boot_command_line(); parse_boot_command_line();
detect_facilities(); detect_facilities();
sanitize_prot_virt_host(); sanitize_prot_virt_host();
max_physmem_end = detect_memory(&safe_addr); max_physmem_end = detect_max_physmem_end();
setup_ident_map_size(max_physmem_end); setup_ident_map_size(max_physmem_end);
setup_vmalloc_size(); setup_vmalloc_size();
asce_limit = setup_kernel_memory_layout(); asce_limit = setup_kernel_memory_layout();
/* got final ident_map_size, physmem allocations could be performed now */
physmem_set_usable_limit(ident_map_size); physmem_set_usable_limit(ident_map_size);
detect_physmem_online_ranges(max_physmem_end);
save_ipl_cert_comp_list();
rescue_initrd(safe_addr, ident_map_size);
#ifdef CONFIG_KASAN
physmem_alloc_top_down(RR_KASAN, kasan_estimate_memory_needs(get_physmem_usable_total()),
_SEGMENT_SIZE);
#endif
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) { if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
random_lma = get_random_base(safe_addr); random_lma = get_random_base();
if (random_lma) { if (random_lma) {
__kaslr_offset = random_lma - vmlinux.default_lma; __kaslr_offset = random_lma - vmlinux.default_lma;
img = (void *)vmlinux.default_lma; img = (void *)vmlinux.default_lma;
...@@ -317,8 +324,16 @@ void startup_kernel(void) ...@@ -317,8 +324,16 @@ void startup_kernel(void)
if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) { if (!IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED)) {
img = decompress_kernel(); img = decompress_kernel();
memmove((void *)vmlinux.default_lma, img, vmlinux.image_size); memmove((void *)vmlinux.default_lma, img, vmlinux.image_size);
} else if (__kaslr_offset) } else if (__kaslr_offset) {
memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size); memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
memset(img, 0, vmlinux.image_size);
}
/* vmlinux decompression is done, shrink reserved low memory */
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
if (!__kaslr_offset)
physmem_reserve(RR_VMLINUX, vmlinux.default_lma, vmlinux.image_size + vmlinux.bss_size);
physmem_alloc_range(RR_AMODE31, vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G, true);
/* /*
* The order of the following operations is important: * The order of the following operations is important:
...@@ -338,16 +353,11 @@ void startup_kernel(void) ...@@ -338,16 +353,11 @@ void startup_kernel(void)
setup_vmem(asce_limit); setup_vmem(asce_limit);
copy_bootdata(); copy_bootdata();
if (__kaslr_offset) { /*
/* * Save KASLR offset for early dumps, before vmcore_info is set.
* Save KASLR offset for early dumps, before vmcore_info is set. * Mark as uneven to distinguish from real vmcore_info pointer.
* Mark as uneven to distinguish from real vmcore_info pointer. */
*/ S390_lowcore.vmcore_info = __kaslr_offset ? __kaslr_offset | 0x1UL : 0;
S390_lowcore.vmcore_info = __kaslr_offset | 0x1UL;
/* Clear non-relocated kernel */
if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
memset(img, 0, vmlinux.image_size);
}
/* /*
* Jump to the decompressed kernel entry point and switch DAT mode on. * Jump to the decompressed kernel entry point and switch DAT mode on.
......
...@@ -10,6 +10,8 @@ ...@@ -10,6 +10,8 @@
#include "decompressor.h" #include "decompressor.h"
#include "boot.h" #include "boot.h"
unsigned long __bootdata_preserved(s390_invalid_asce);
#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off) #define init_mm (*(struct mm_struct *)vmlinux.init_mm_off)
#define swapper_pg_dir vmlinux.swapper_pg_dir_off #define swapper_pg_dir vmlinux.swapper_pg_dir_off
#define invalid_pg_dir vmlinux.invalid_pg_dir_off #define invalid_pg_dir vmlinux.invalid_pg_dir_off
...@@ -22,77 +24,27 @@ static inline pte_t *__virt_to_kpte(unsigned long va) ...@@ -22,77 +24,27 @@ static inline pte_t *__virt_to_kpte(unsigned long va)
return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va); return pte_offset_kernel(pmd_offset(pud_offset(p4d_offset(pgd_offset_k(va), va), va), va), va);
} }
unsigned long __bootdata_preserved(s390_invalid_asce);
unsigned long __bootdata(pgalloc_pos);
unsigned long __bootdata(pgalloc_end);
unsigned long __bootdata(pgalloc_low);
enum populate_mode { enum populate_mode {
POPULATE_NONE, POPULATE_NONE,
POPULATE_ONE2ONE, POPULATE_ONE2ONE,
POPULATE_ABS_LOWCORE, POPULATE_ABS_LOWCORE,
}; };
static void boot_check_oom(void)
{
if (pgalloc_pos < pgalloc_low)
error("out of memory on boot\n");
}
static void pgtable_populate_init(void)
{
unsigned long initrd_end;
unsigned long kernel_end;
kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
pgalloc_low = round_up(kernel_end, PAGE_SIZE);
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
pgalloc_low = max(pgalloc_low, initrd_end);
}
pgalloc_end = round_down(get_physmem_usable_end(), PAGE_SIZE);
pgalloc_pos = pgalloc_end;
boot_check_oom();
}
static void *boot_alloc_pages(unsigned int order)
{
unsigned long size = PAGE_SIZE << order;
pgalloc_pos -= size;
pgalloc_pos = round_down(pgalloc_pos, size);
boot_check_oom();
return (void *)pgalloc_pos;
}
static void *boot_crst_alloc(unsigned long val) static void *boot_crst_alloc(unsigned long val)
{ {
unsigned long size = PAGE_SIZE << CRST_ALLOC_ORDER;
unsigned long *table; unsigned long *table;
table = boot_alloc_pages(CRST_ALLOC_ORDER); table = (unsigned long *)physmem_alloc_top_down(RR_VMEM, size, size);
if (table) crst_table_init(table, val);
crst_table_init(table, val);
return table; return table;
} }
static pte_t *boot_pte_alloc(void) static pte_t *boot_pte_alloc(void)
{ {
static void *pte_leftover;
pte_t *pte; pte_t *pte;
BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE); pte = (pte_t *)physmem_alloc_top_down(RR_VMEM, _PAGE_TABLE_SIZE, _PAGE_TABLE_SIZE);
if (!pte_leftover) {
pte_leftover = boot_alloc_pages(0);
pte = pte_leftover + _PAGE_TABLE_SIZE;
} else {
pte = pte_leftover;
pte_leftover = NULL;
}
memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE); memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
return pte; return pte;
} }
...@@ -126,7 +78,6 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end) ...@@ -126,7 +78,6 @@ static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end, static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end,
enum populate_mode mode) enum populate_mode mode)
{ {
unsigned long next;
pte_t *pte, entry; pte_t *pte, entry;
pte = pte_offset_kernel(pmd, addr); pte = pte_offset_kernel(pmd, addr);
...@@ -250,7 +201,6 @@ void setup_vmem(unsigned long asce_limit) ...@@ -250,7 +201,6 @@ void setup_vmem(unsigned long asce_limit)
* To prevent creation of a large page at address 0 first map * To prevent creation of a large page at address 0 first map
* the lowcore and create the identity mapping only afterwards. * the lowcore and create the identity mapping only afterwards.
*/ */
pgtable_populate_init();
pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE); pgtable_populate(0, sizeof(struct lowcore), POPULATE_ONE2ONE);
for_each_physmem_usable_range(i, &start, &end) for_each_physmem_usable_range(i, &start, &end)
pgtable_populate(start, end, POPULATE_ONE2ONE); pgtable_populate(start, end, POPULATE_ONE2ONE);
...@@ -269,10 +219,3 @@ void setup_vmem(unsigned long asce_limit) ...@@ -269,10 +219,3 @@ void setup_vmem(unsigned long asce_limit)
init_mm.context.asce = S390_lowcore.kernel_asce; init_mm.context.asce = S390_lowcore.kernel_asce;
} }
unsigned long vmem_estimate_memory_needs(unsigned long online_mem_total)
{
unsigned long pages = DIV_ROUND_UP(online_mem_total, PAGE_SIZE);
return DIV_ROUND_UP(pages, _PAGE_ENTRIES) * _PAGE_TABLE_SIZE * 2;
}
...@@ -93,6 +93,8 @@ SECTIONS ...@@ -93,6 +93,8 @@ SECTIONS
_decompressor_syms_end = .; _decompressor_syms_end = .;
} }
_decompressor_end = .;
#ifdef CONFIG_KERNEL_UNCOMPRESSED #ifdef CONFIG_KERNEL_UNCOMPRESSED
. = 0x100000; . = 0x100000;
#else #else
......
...@@ -17,6 +17,27 @@ struct physmem_range { ...@@ -17,6 +17,27 @@ struct physmem_range {
u64 end; u64 end;
}; };
enum reserved_range_type {
RR_DECOMPRESSOR,
RR_INITRD,
RR_VMLINUX,
RR_AMODE31,
RR_IPLREPORT,
RR_CERT_COMP_LIST,
RR_MEM_DETECT_EXTENDED,
RR_VMEM,
#ifdef CONFIG_KASAN
RR_KASAN,
#endif
RR_MAX
};
struct reserved_range {
unsigned long start;
unsigned long end;
struct reserved_range *chain;
};
/* /*
* Storage element id is defined as 1 byte (up to 256 storage elements). * Storage element id is defined as 1 byte (up to 256 storage elements).
* In practise only storage element id 0 and 1 are used). * In practise only storage element id 0 and 1 are used).
...@@ -31,6 +52,7 @@ struct physmem_info { ...@@ -31,6 +52,7 @@ struct physmem_info {
u32 range_count; u32 range_count;
u8 info_source; u8 info_source;
unsigned long usable; unsigned long usable;
struct reserved_range reserved[RR_MAX];
struct physmem_range online[MEM_INLINED_ENTRIES]; struct physmem_range online[MEM_INLINED_ENTRIES];
struct physmem_range *online_extended; struct physmem_range *online_extended;
}; };
...@@ -80,6 +102,70 @@ static inline int __get_physmem_range(u32 n, unsigned long *start, ...@@ -80,6 +102,70 @@ static inline int __get_physmem_range(u32 n, unsigned long *start,
#define for_each_physmem_online_range(i, p_start, p_end) \ #define for_each_physmem_online_range(i, p_start, p_end) \
for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++) for (i = 0; !__get_physmem_range(i, p_start, p_end, false); i++)
static inline const char *get_physmem_info_source(void)
{
switch (physmem_info.info_source) {
case MEM_DETECT_SCLP_STOR_INFO:
return "sclp storage info";
case MEM_DETECT_DIAG260:
return "diag260";
case MEM_DETECT_SCLP_READ_INFO:
return "sclp read info";
case MEM_DETECT_BIN_SEARCH:
return "binary search";
}
return "none";
}
#define RR_TYPE_NAME(t) case RR_ ## t: return #t
static inline const char *get_rr_type_name(enum reserved_range_type t)
{
switch (t) {
RR_TYPE_NAME(DECOMPRESSOR);
RR_TYPE_NAME(INITRD);
RR_TYPE_NAME(VMLINUX);
RR_TYPE_NAME(AMODE31);
RR_TYPE_NAME(IPLREPORT);
RR_TYPE_NAME(CERT_COMP_LIST);
RR_TYPE_NAME(MEM_DETECT_EXTENDED);
RR_TYPE_NAME(VMEM);
#ifdef CONFIG_KASAN
RR_TYPE_NAME(KASAN);
#endif
default:
return "UNKNOWN";
}
}
#define for_each_physmem_reserved_type_range(t, range, p_start, p_end) \
for (range = &physmem_info.reserved[t], *p_start = range->start, *p_end = range->end; \
range && range->end; range = range->chain, \
*p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
static inline struct reserved_range *__physmem_reserved_next(enum reserved_range_type *t,
struct reserved_range *range)
{
if (!range) {
range = &physmem_info.reserved[*t];
if (range->end)
return range;
}
if (range->chain)
return range->chain;
while (++*t < RR_MAX) {
range = &physmem_info.reserved[*t];
if (range->end)
return range;
}
return NULL;
}
#define for_each_physmem_reserved_range(t, range, p_start, p_end) \
for (t = 0, range = __physmem_reserved_next(&t, NULL), \
*p_start = range ? range->start : 0, *p_end = range ? range->end : 0; \
range; range = __physmem_reserved_next(&t, range), \
*p_start = range ? range->start : 0, *p_end = range ? range->end : 0)
static inline unsigned long get_physmem_usable_total(void) static inline unsigned long get_physmem_usable_total(void)
{ {
unsigned long start, end, total = 0; unsigned long start, end, total = 0;
...@@ -91,28 +177,12 @@ static inline unsigned long get_physmem_usable_total(void) ...@@ -91,28 +177,12 @@ static inline unsigned long get_physmem_usable_total(void)
return total; return total;
} }
static inline void get_physmem_reserved(unsigned long *start, unsigned long *size) static inline unsigned long get_physmem_reserved(enum reserved_range_type type,
unsigned long *addr, unsigned long *size)
{ {
*start = (unsigned long)physmem_info.online_extended; *addr = physmem_info.reserved[type].start;
if (physmem_info.range_count > MEM_INLINED_ENTRIES) *size = physmem_info.reserved[type].end - physmem_info.reserved[type].start;
*size = (physmem_info.range_count - MEM_INLINED_ENTRIES) * return *size;
sizeof(struct physmem_range);
else
*size = 0;
}
static inline unsigned long get_physmem_usable_end(void)
{
unsigned long start;
unsigned long end;
if (physmem_info.usable)
return physmem_info.usable;
if (physmem_info.range_count) {
__get_physmem_range(physmem_info.range_count - 1, &start, &end, false);
return end;
}
return 0;
} }
#endif #endif
...@@ -74,10 +74,6 @@ extern unsigned int zlib_dfltcc_support; ...@@ -74,10 +74,6 @@ extern unsigned int zlib_dfltcc_support;
extern int noexec_disabled; extern int noexec_disabled;
extern unsigned long ident_map_size; extern unsigned long ident_map_size;
extern unsigned long pgalloc_pos;
extern unsigned long pgalloc_end;
extern unsigned long pgalloc_low;
extern unsigned long __amode31_base;
/* The Write Back bit position in the physaddr is given by the SLPC PCI */ /* The Write Back bit position in the physaddr is given by the SLPC PCI */
extern unsigned long mio_wb_bit_mask; extern unsigned long mio_wb_bit_mask;
...@@ -150,11 +146,6 @@ static inline unsigned long kaslr_offset(void) ...@@ -150,11 +146,6 @@ static inline unsigned long kaslr_offset(void)
return __kaslr_offset; return __kaslr_offset;
} }
struct initrd_data {
unsigned long start;
unsigned long size;
};
extern struct initrd_data initrd_data;
struct oldmem_data { struct oldmem_data {
unsigned long start; unsigned long start;
......
...@@ -148,13 +148,8 @@ static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; ...@@ -148,13 +148,8 @@ static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
int __bootdata(noexec_disabled); int __bootdata(noexec_disabled);
unsigned long __bootdata(ident_map_size); unsigned long __bootdata(ident_map_size);
struct physmem_info __bootdata(physmem_info); struct physmem_info __bootdata(physmem_info);
struct initrd_data __bootdata(initrd_data);
unsigned long __bootdata(pgalloc_pos);
unsigned long __bootdata(pgalloc_end);
unsigned long __bootdata(pgalloc_low);
unsigned long __bootdata_preserved(__kaslr_offset); unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata(__amode31_base);
unsigned int __bootdata_preserved(zlib_dfltcc_support); unsigned int __bootdata_preserved(zlib_dfltcc_support);
EXPORT_SYMBOL(zlib_dfltcc_support); EXPORT_SYMBOL(zlib_dfltcc_support);
u64 __bootdata_preserved(stfle_fac_list[16]); u64 __bootdata_preserved(stfle_fac_list[16]);
...@@ -635,7 +630,11 @@ static struct notifier_block kdump_mem_nb = { ...@@ -635,7 +630,11 @@ static struct notifier_block kdump_mem_nb = {
*/ */
static void __init reserve_pgtables(void) static void __init reserve_pgtables(void)
{ {
memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos); unsigned long start, end;
struct reserved_range *range;
for_each_physmem_reserved_type_range(RR_VMEM, range, &start, &end)
memblock_reserve(start, end - start);
} }
/* /*
...@@ -712,13 +711,13 @@ static void __init reserve_crashkernel(void) ...@@ -712,13 +711,13 @@ static void __init reserve_crashkernel(void)
*/ */
static void __init reserve_initrd(void) static void __init reserve_initrd(void)
{ {
#ifdef CONFIG_BLK_DEV_INITRD unsigned long addr, size;
if (!initrd_data.start || !initrd_data.size)
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD) || !get_physmem_reserved(RR_INITRD, &addr, &size))
return; return;
initrd_start = (unsigned long)__va(initrd_data.start); initrd_start = (unsigned long)__va(addr);
initrd_end = initrd_start + initrd_data.size; initrd_end = initrd_start + size;
memblock_reserve(initrd_data.start, initrd_data.size); memblock_reserve(addr, size);
#endif
} }
/* /*
...@@ -732,35 +731,18 @@ static void __init reserve_certificate_list(void) ...@@ -732,35 +731,18 @@ static void __init reserve_certificate_list(void)
static void __init reserve_physmem_info(void) static void __init reserve_physmem_info(void)
{ {
unsigned long start, size; unsigned long addr, size;
get_physmem_reserved(&start, &size); if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
if (size) memblock_reserve(addr, size);
memblock_reserve(start, size);
} }
static void __init free_physmem_info(void) static void __init free_physmem_info(void)
{ {
unsigned long start, size; unsigned long addr, size;
get_physmem_reserved(&start, &size); if (get_physmem_reserved(RR_MEM_DETECT_EXTENDED, &addr, &size))
if (size) memblock_phys_free(addr, size);
memblock_phys_free(start, size);
}
static const char * __init get_mem_info_source(void)
{
switch (physmem_info.info_source) {
case MEM_DETECT_SCLP_STOR_INFO:
return "sclp storage info";
case MEM_DETECT_DIAG260:
return "diag260";
case MEM_DETECT_SCLP_READ_INFO:
return "sclp read info";
case MEM_DETECT_BIN_SEARCH:
return "binary search";
}
return "none";
} }
static void __init memblock_add_physmem_info(void) static void __init memblock_add_physmem_info(void)
...@@ -769,7 +751,7 @@ static void __init memblock_add_physmem_info(void) ...@@ -769,7 +751,7 @@ static void __init memblock_add_physmem_info(void)
int i; int i;
pr_debug("physmem info source: %s (%hhd)\n", pr_debug("physmem info source: %s (%hhd)\n",
get_mem_info_source(), physmem_info.info_source); get_physmem_info_source(), physmem_info.info_source);
/* keep memblock lists close to the kernel */ /* keep memblock lists close to the kernel */
memblock_set_bottom_up(true); memblock_set_bottom_up(true);
for_each_physmem_usable_range(i, &start, &end) for_each_physmem_usable_range(i, &start, &end)
...@@ -780,21 +762,6 @@ static void __init memblock_add_physmem_info(void) ...@@ -780,21 +762,6 @@ static void __init memblock_add_physmem_info(void)
memblock_set_node(0, ULONG_MAX, &memblock.memory, 0); memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
} }
/*
* Check for initrd being in usable memory
*/
static void __init check_initrd(void)
{
#ifdef CONFIG_BLK_DEV_INITRD
if (initrd_data.start && initrd_data.size &&
!memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
pr_err("The initial RAM disk does not fit into the memory\n");
memblock_phys_free(initrd_data.start, initrd_data.size);
initrd_start = initrd_end = 0;
}
#endif
}
/* /*
* Reserve memory used for lowcore/command line/kernel image. * Reserve memory used for lowcore/command line/kernel image.
*/ */
...@@ -803,7 +770,7 @@ static void __init reserve_kernel(void) ...@@ -803,7 +770,7 @@ static void __init reserve_kernel(void)
memblock_reserve(0, STARTUP_NORMAL_OFFSET); memblock_reserve(0, STARTUP_NORMAL_OFFSET);
memblock_reserve(OLDMEM_BASE, sizeof(unsigned long)); memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long)); memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
memblock_reserve(__amode31_base, __eamode31 - __samode31); memblock_reserve(physmem_info.reserved[RR_AMODE31].start, __eamode31 - __samode31);
memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP); memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
memblock_reserve(__pa(_stext), _end - _stext); memblock_reserve(__pa(_stext), _end - _stext);
} }
...@@ -825,13 +792,13 @@ static void __init setup_memory(void) ...@@ -825,13 +792,13 @@ static void __init setup_memory(void)
static void __init relocate_amode31_section(void) static void __init relocate_amode31_section(void)
{ {
unsigned long amode31_size = __eamode31 - __samode31; unsigned long amode31_size = __eamode31 - __samode31;
long amode31_offset = __amode31_base - __samode31; long amode31_offset = physmem_info.reserved[RR_AMODE31].start - __samode31;
long *ptr; long *ptr;
pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size); pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
/* Move original AMODE31 section to the new one */ /* Move original AMODE31 section to the new one */
memmove((void *)__amode31_base, (void *)__samode31, amode31_size); memmove((void *)physmem_info.reserved[RR_AMODE31].start, (void *)__samode31, amode31_size);
/* Zero out the old AMODE31 section to catch invalid accesses within it */ /* Zero out the old AMODE31 section to catch invalid accesses within it */
memset((void *)__samode31, 0, amode31_size); memset((void *)__samode31, 0, amode31_size);
...@@ -1017,7 +984,6 @@ void __init setup_arch(char **cmdline_p) ...@@ -1017,7 +984,6 @@ void __init setup_arch(char **cmdline_p)
if (MACHINE_HAS_EDAT2) if (MACHINE_HAS_EDAT2)
hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
check_initrd();
reserve_crashkernel(); reserve_crashkernel();
#ifdef CONFIG_CRASH_DUMP #ifdef CONFIG_CRASH_DUMP
/* /*
......
// SPDX-License-Identifier: GPL-2.0 // SPDX-License-Identifier: GPL-2.0
#include <linux/kasan.h> #include <linux/memblock.h>
#include <linux/sched/task.h>
#include <linux/pgtable.h> #include <linux/pgtable.h>
#include <asm/pgalloc.h> #include <linux/kasan.h>
#include <asm/kasan.h>
#include <asm/physmem_info.h> #include <asm/physmem_info.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/sclp.h>
#include <asm/facility.h> #include <asm/facility.h>
#include <asm/sections.h> #include <asm/pgalloc.h>
#include <asm/setup.h> #include <asm/sclp.h>
#include <asm/uv.h>
static unsigned long pgalloc_pos __initdata;
static unsigned long segment_pos __initdata; static unsigned long segment_pos __initdata;
static unsigned long segment_low __initdata;
static bool has_edat __initdata; static bool has_edat __initdata;
static bool has_nx __initdata; static bool has_nx __initdata;
...@@ -28,19 +24,20 @@ static void __init kasan_early_panic(const char *reason) ...@@ -28,19 +24,20 @@ static void __init kasan_early_panic(const char *reason)
static void * __init kasan_early_alloc_segment(void) static void * __init kasan_early_alloc_segment(void)
{ {
segment_pos -= _SEGMENT_SIZE; unsigned long addr = segment_pos;
if (segment_pos < segment_low) segment_pos += _SEGMENT_SIZE;
if (segment_pos > pgalloc_pos)
kasan_early_panic("out of memory during initialisation\n"); kasan_early_panic("out of memory during initialisation\n");
return __va(segment_pos); return __va(addr);
} }
static void * __init kasan_early_alloc_pages(unsigned int order) static void * __init kasan_early_alloc_pages(unsigned int order)
{ {
pgalloc_pos -= (PAGE_SIZE << order); pgalloc_pos -= (PAGE_SIZE << order);
if (pgalloc_pos < pgalloc_low) if (segment_pos > pgalloc_pos)
kasan_early_panic("out of memory during initialisation\n"); kasan_early_panic("out of memory during initialisation\n");
return __va(pgalloc_pos); return __va(pgalloc_pos);
...@@ -225,8 +222,8 @@ void __init kasan_early_init(void) ...@@ -225,8 +222,8 @@ void __init kasan_early_init(void)
pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY); pmd_t pmd_z = __pmd(__pa(kasan_early_shadow_pte) | _SEGMENT_ENTRY);
pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY); pud_t pud_z = __pud(__pa(kasan_early_shadow_pmd) | _REGION3_ENTRY);
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY); p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
unsigned long pgalloc_pos_initial, segment_pos_initial;
unsigned long untracked_end = MODULES_VADDR; unsigned long untracked_end = MODULES_VADDR;
unsigned long shadow_alloc_size;
unsigned long start, end; unsigned long start, end;
int i; int i;
...@@ -243,13 +240,11 @@ void __init kasan_early_init(void) ...@@ -243,13 +240,11 @@ void __init kasan_early_init(void)
crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z)); crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE); memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
if (has_edat) { /* segment allocations go bottom up -> <- pgalloc go top down */
shadow_alloc_size = get_physmem_usable_total() >> KASAN_SHADOW_SCALE_SHIFT; segment_pos_initial = physmem_info.reserved[RR_KASAN].start;
segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE); segment_pos = segment_pos_initial;
segment_low = segment_pos - shadow_alloc_size; pgalloc_pos_initial = physmem_info.reserved[RR_KASAN].end;
segment_low = round_down(segment_low, _SEGMENT_SIZE); pgalloc_pos = pgalloc_pos_initial;
pgalloc_pos = segment_low;
}
/* /*
* Current memory layout: * Current memory layout:
* +- 0 -------------+ +- shadow start -+ * +- 0 -------------+ +- shadow start -+
...@@ -298,4 +293,6 @@ void __init kasan_early_init(void) ...@@ -298,4 +293,6 @@ void __init kasan_early_init(void)
/* enable kasan */ /* enable kasan */
init_task.kasan_depth = 0; init_task.kasan_depth = 0;
sclp_early_printk("KernelAddressSanitizer initialized\n"); sclp_early_printk("KernelAddressSanitizer initialized\n");
memblock_reserve(segment_pos_initial, segment_pos - segment_pos_initial);
memblock_reserve(pgalloc_pos, pgalloc_pos_initial - pgalloc_pos);
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment