Commit bd9e99f7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_boot_for_v6.6_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Borislav Petkov:
 "Avoid the baremetal decompressor code when booting on an EFI machine.

  This is mandated by the current tightening of EFI executables
  requirements when used in a secure boot scenario. More specifically,
  an EFI executable cannot have a single section with RWX permissions,
  which conflicts with the in-place kernel decompression that is done
  today.

  Instead, the things required by the booting kernel image are done in
  the EFI stub now.

  Work by Ard Biesheuvel"

* tag 'x86_boot_for_v6.6_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
  x86/efistub: Avoid legacy decompressor when doing EFI boot
  x86/efistub: Perform SNP feature test while running in the firmware
  efi/libstub: Add limit argument to efi_random_alloc()
  x86/decompressor: Factor out kernel decompression and relocation
  x86/decompressor: Move global symbol references to C code
  decompress: Use 8 byte alignment
  x86/efistub: Prefer EFI memory attributes protocol over DXE services
  x86/efistub: Perform 4/5 level paging switch from the stub
  x86/decompressor: Merge trampoline cleanup with switching code
  x86/decompressor: Pass pgtable address to trampoline directly
  x86/decompressor: Only call the trampoline when changing paging levels
  x86/decompressor: Call trampoline directly from C code
  x86/decompressor: Avoid the need for a stack in the 32-bit trampoline
  x86/decompressor: Use standard calling convention for trampoline
  x86/decompressor: Call trampoline as a normal function
  x86/decompressor: Assign paging related global variables earlier
  x86/decompressor: Store boot_params pointer in callee save register
  x86/efistub: Clear BSS in EFI handover protocol entrypoint
  x86/decompressor: Avoid magic offsets for EFI handover entrypoint
  x86/efistub: Simplify and clean up handover entry code
  ...
parents 6f49693a a1b87d54
...@@ -1417,7 +1417,7 @@ execution context provided by the EFI firmware. ...@@ -1417,7 +1417,7 @@ execution context provided by the EFI firmware.
The function prototype for the handover entry point looks like this:: The function prototype for the handover entry point looks like this::
efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp) efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)
'handle' is the EFI image handle passed to the boot loader by the EFI 'handle' is the EFI image handle passed to the boot loader by the EFI
firmware, 'table' is the EFI system table - these are the first two firmware, 'table' is the EFI system table - these are the first two
......
...@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack ...@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack
ifeq ($(CONFIG_LD_IS_BFD),y) ifeq ($(CONFIG_LD_IS_BFD),y)
LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
endif endif
ifeq ($(CONFIG_EFI_STUB),y)
# ensure that the static EFI stub library will be pulled in, even if it is
# never referenced explicitly from the startup code
LDFLAGS_vmlinux += -u efi_pe_entry
endif
LDFLAGS_vmlinux += -T LDFLAGS_vmlinux += -T
hostprogs := mkpiggy hostprogs := mkpiggy
......
...@@ -26,8 +26,8 @@ ...@@ -26,8 +26,8 @@
* When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode()
* is the first thing that runs after switching to long mode. Depending on * is the first thing that runs after switching to long mode. Depending on
* whether the EFI handover protocol or the compat entry point was used to * whether the EFI handover protocol or the compat entry point was used to
* enter the kernel, it will either branch to the 64-bit EFI handover * enter the kernel, it will either branch to the common 64-bit EFI stub
* entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF
* entrypoint efi_pe_entry(). In the former case, the bootloader must provide a * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a
* struct bootparams pointer as the third argument, so the presence of such a * struct bootparams pointer as the third argument, so the presence of such a
* pointer is used to disambiguate. * pointer is used to disambiguate.
...@@ -37,21 +37,23 @@ ...@@ -37,21 +37,23 @@
* | efi32_pe_entry |---->| | | +-----------+--+ * | efi32_pe_entry |---->| | | +-----------+--+
* +------------------+ | | +------+----------------+ | * +------------------+ | | +------+----------------+ |
* | startup_32 |---->| startup_64_mixed_mode | | * | startup_32 |---->| startup_64_mixed_mode | |
* +------------------+ | | +------+----------------+ V * +------------------+ | | +------+----------------+ |
* | efi32_stub_entry |---->| | | +------------------+ * | efi32_stub_entry |---->| | | |
* +------------------+ +------------+ +---->| efi64_stub_entry | * +------------------+ +------------+ | |
* +-------------+----+ * V |
* +------------+ +----------+ | * +------------+ +----------------+ |
* | startup_64 |<----| efi_main |<--------------+ * | startup_64 |<----| efi_stub_entry |<--------+
* +------------+ +----------+ * +------------+ +----------------+
*/ */
SYM_FUNC_START(startup_64_mixed_mode) SYM_FUNC_START(startup_64_mixed_mode)
lea efi32_boot_args(%rip), %rdx lea efi32_boot_args(%rip), %rdx
mov 0(%rdx), %edi mov 0(%rdx), %edi
mov 4(%rdx), %esi mov 4(%rdx), %esi
#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
mov 8(%rdx), %edx // saved bootparams pointer mov 8(%rdx), %edx // saved bootparams pointer
test %edx, %edx test %edx, %edx
jnz efi64_stub_entry jnz efi_stub_entry
#endif
/* /*
* efi_pe_entry uses MS calling convention, which requires 32 bytes of * efi_pe_entry uses MS calling convention, which requires 32 bytes of
* shadow space on the stack even if all arguments are passed in * shadow space on the stack even if all arguments are passed in
...@@ -138,6 +140,28 @@ SYM_FUNC_START(__efi64_thunk) ...@@ -138,6 +140,28 @@ SYM_FUNC_START(__efi64_thunk)
SYM_FUNC_END(__efi64_thunk) SYM_FUNC_END(__efi64_thunk)
.code32 .code32
#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
SYM_FUNC_START(efi32_stub_entry)
call 1f
1: popl %ecx
/* Clear BSS */
xorl %eax, %eax
leal (_bss - 1b)(%ecx), %edi
leal (_ebss - 1b)(%ecx), %ecx
subl %edi, %ecx
shrl $2, %ecx
cld
rep stosl
add $0x4, %esp /* Discard return address */
popl %ecx
popl %edx
popl %esi
jmp efi32_entry
SYM_FUNC_END(efi32_stub_entry)
#endif
/* /*
* EFI service pointer must be in %edi. * EFI service pointer must be in %edi.
* *
...@@ -218,7 +242,7 @@ SYM_FUNC_END(efi_enter32) ...@@ -218,7 +242,7 @@ SYM_FUNC_END(efi_enter32)
* stub may still exit and return to the firmware using the Exit() EFI boot * stub may still exit and return to the firmware using the Exit() EFI boot
* service.] * service.]
*/ */
SYM_FUNC_START(efi32_entry) SYM_FUNC_START_LOCAL(efi32_entry)
call 1f call 1f
1: pop %ebx 1: pop %ebx
...@@ -245,10 +269,6 @@ SYM_FUNC_START(efi32_entry) ...@@ -245,10 +269,6 @@ SYM_FUNC_START(efi32_entry)
jmp startup_32 jmp startup_32
SYM_FUNC_END(efi32_entry) SYM_FUNC_END(efi32_entry)
#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
/* /*
* efi_status_t efi32_pe_entry(efi_handle_t image_handle, * efi_status_t efi32_pe_entry(efi_handle_t image_handle,
* efi_system_table_32_t *sys_table) * efi_system_table_32_t *sys_table)
...@@ -256,8 +276,6 @@ SYM_FUNC_END(efi32_entry) ...@@ -256,8 +276,6 @@ SYM_FUNC_END(efi32_entry)
SYM_FUNC_START(efi32_pe_entry) SYM_FUNC_START(efi32_pe_entry)
pushl %ebp pushl %ebp
movl %esp, %ebp movl %esp, %ebp
pushl %eax // dummy push to allocate loaded_image
pushl %ebx // save callee-save registers pushl %ebx // save callee-save registers
pushl %edi pushl %edi
...@@ -266,48 +284,8 @@ SYM_FUNC_START(efi32_pe_entry) ...@@ -266,48 +284,8 @@ SYM_FUNC_START(efi32_pe_entry)
movl $0x80000003, %eax // EFI_UNSUPPORTED movl $0x80000003, %eax // EFI_UNSUPPORTED
jnz 2f jnz 2f
call 1f
1: pop %ebx
/* Get the loaded image protocol pointer from the image handle */
leal -4(%ebp), %eax
pushl %eax // &loaded_image
leal (loaded_image_proto - 1b)(%ebx), %eax
pushl %eax // pass the GUID address
pushl 8(%ebp) // pass the image handle
/*
* Note the alignment of the stack frame.
* sys_table
* handle <-- 16-byte aligned on entry by ABI
* return address
* frame pointer
* loaded_image <-- local variable
* saved %ebx <-- 16-byte aligned here
* saved %edi
* &loaded_image
* &loaded_image_proto
* handle <-- 16-byte aligned for call to handle_protocol
*/
movl 12(%ebp), %eax // sys_table
movl ST32_boottime(%eax), %eax // sys_table->boottime
call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
addl $12, %esp // restore argument space
testl %eax, %eax
jnz 2f
movl 8(%ebp), %ecx // image_handle movl 8(%ebp), %ecx // image_handle
movl 12(%ebp), %edx // sys_table movl 12(%ebp), %edx // sys_table
movl -4(%ebp), %esi // loaded_image
movl LI32_image_base(%esi), %esi // loaded_image->image_base
leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32
/*
* We need to set the image_offset variable here since startup_32() will
* use it before we get to the 64-bit efi_pe_entry() in C code.
*/
subl %esi, %ebp // calculate image_offset
movl %ebp, (image_offset - 1b)(%ebx) // save image_offset
xorl %esi, %esi xorl %esi, %esi
jmp efi32_entry // pass %ecx, %edx, %esi jmp efi32_entry // pass %ecx, %edx, %esi
// no other registers remain live // no other registers remain live
...@@ -318,14 +296,13 @@ SYM_FUNC_START(efi32_pe_entry) ...@@ -318,14 +296,13 @@ SYM_FUNC_START(efi32_pe_entry)
RET RET
SYM_FUNC_END(efi32_pe_entry) SYM_FUNC_END(efi32_pe_entry)
.section ".rodata" #ifdef CONFIG_EFI_HANDOVER_PROTOCOL
/* EFI loaded image protocol GUID */ .org efi32_stub_entry + 0x200
.balign 4 .code64
SYM_DATA_START_LOCAL(loaded_image_proto) SYM_FUNC_START_NOALIGN(efi64_stub_entry)
.long 0x5b1b31a1 jmp efi_handover_entry
.word 0x9562, 0x11d2 SYM_FUNC_END(efi64_stub_entry)
.byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b #endif
SYM_DATA_END(loaded_image_proto)
.data .data
.balign 8 .balign 8
......
...@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32) ...@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32)
#ifdef CONFIG_RELOCATABLE #ifdef CONFIG_RELOCATABLE
leal startup_32@GOTOFF(%edx), %ebx leal startup_32@GOTOFF(%edx), %ebx
#ifdef CONFIG_EFI_STUB
/*
* If we were loaded via the EFI LoadImage service, startup_32() will be at an
* offset to the start of the space allocated for the image. efi_pe_entry() will
* set up image_offset to tell us where the image actually starts, so that we
* can use the full available buffer.
* image_offset = startup_32 - image_base
* Otherwise image_offset will be zero and has no effect on the calculations.
*/
subl image_offset@GOTOFF(%edx), %ebx
#endif
movl BP_kernel_alignment(%esi), %eax movl BP_kernel_alignment(%esi), %eax
decl %eax decl %eax
addl %eax, %ebx addl %eax, %ebx
...@@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32) ...@@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32)
jmp *%eax jmp *%eax
SYM_FUNC_END(startup_32) SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_STUB
SYM_FUNC_START(efi32_stub_entry)
add $0x4, %esp
movl 8(%esp), %esi /* save boot_params pointer */
call efi_main
/* efi_main returns the possibly relocated address of startup_32 */
jmp *%eax
SYM_FUNC_END(efi32_stub_entry)
SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
#endif
.text .text
SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
...@@ -179,13 +155,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) ...@@ -179,13 +155,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
*/ */
/* push arguments for extract_kernel: */ /* push arguments for extract_kernel: */
pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
pushl %ebp /* output address */ pushl %ebp /* output address */
pushl input_len@GOTOFF(%ebx) /* input_len */
leal input_data@GOTOFF(%ebx), %eax
pushl %eax /* input_data */
leal boot_heap@GOTOFF(%ebx), %eax
pushl %eax /* heap area */
pushl %esi /* real mode pointer */ pushl %esi /* real mode pointer */
call extract_kernel /* returns kernel entry point in %eax */ call extract_kernel /* returns kernel entry point in %eax */
addl $24, %esp addl $24, %esp
...@@ -213,8 +183,6 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) ...@@ -213,8 +183,6 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
*/ */
.bss .bss
.balign 4 .balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack: boot_stack:
.fill BOOT_STACK_SIZE, 1, 0 .fill BOOT_STACK_SIZE, 1, 0
boot_stack_end: boot_stack_end:
This diff is collapsed.
...@@ -330,6 +330,33 @@ static size_t parse_elf(void *output) ...@@ -330,6 +330,33 @@ static size_t parse_elf(void *output)
return ehdr.e_entry - LOAD_PHYSICAL_ADDR; return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
} }
const unsigned long kernel_total_size = VO__end - VO__text;
static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
extern unsigned char input_data[];
extern unsigned int input_len, output_len;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
void (*error)(char *x))
{
unsigned long entry;
if (!free_mem_ptr) {
free_mem_ptr = (unsigned long)boot_heap;
free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap);
}
if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len,
NULL, error) < 0)
return ULONG_MAX;
entry = parse_elf(outbuf);
handle_relocations(outbuf, output_len, virt_addr);
return entry;
}
/* /*
* The compressed kernel image (ZO), has been moved so that its position * The compressed kernel image (ZO), has been moved so that its position
* is against the end of the buffer used to hold the uncompressed kernel * is against the end of the buffer used to hold the uncompressed kernel
...@@ -347,14 +374,10 @@ static size_t parse_elf(void *output) ...@@ -347,14 +374,10 @@ static size_t parse_elf(void *output)
* |-------uncompressed kernel image---------| * |-------uncompressed kernel image---------|
* *
*/ */
asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
unsigned long output_len)
{ {
const unsigned long kernel_total_size = VO__end - VO__text;
unsigned long virt_addr = LOAD_PHYSICAL_ADDR; unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
memptr heap = (memptr)boot_heap;
unsigned long needed_size; unsigned long needed_size;
size_t entry_offset; size_t entry_offset;
...@@ -412,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, ...@@ -412,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
* entries. This ensures the full mapped area is usable RAM * entries. This ensures the full mapped area is usable RAM
* and doesn't include any reserved areas. * and doesn't include any reserved areas.
*/ */
needed_size = max(output_len, kernel_total_size); needed_size = max_t(unsigned long, output_len, kernel_total_size);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
#endif #endif
...@@ -443,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, ...@@ -443,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL) if (heap > 0x3fffffffffffUL)
error("Destination address too large"); error("Destination address too large");
if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) if (virt_addr + needed_size > KERNEL_IMAGE_SIZE)
error("Destination virtual address is beyond the kernel mapping area"); error("Destination virtual address is beyond the kernel mapping area");
#else #else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
...@@ -461,10 +484,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, ...@@ -461,10 +484,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
accept_memory(__pa(output), __pa(output) + needed_size); accept_memory(__pa(output), __pa(output) + needed_size);
} }
__decompress(input_data, input_len, NULL, NULL, output, output_len, entry_offset = decompress_kernel(output, virt_addr, error);
NULL, error);
entry_offset = parse_elf(output);
handle_relocations(output, output_len, virt_addr);
debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); debug_putstr("done.\nBooting the kernel (entry_offset: 0x");
debug_puthex(entry_offset); debug_puthex(entry_offset);
......
...@@ -179,9 +179,7 @@ static inline int count_immovable_mem_regions(void) { return 0; } ...@@ -179,9 +179,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
#endif #endif
/* ident_map_64.c */ /* ident_map_64.c */
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
#endif
extern void kernel_add_identity_map(unsigned long start, unsigned long end); extern void kernel_add_identity_map(unsigned long start, unsigned long end);
/* Used by PAGE_KERN* macros: */ /* Used by PAGE_KERN* macros: */
......
...@@ -3,18 +3,16 @@ ...@@ -3,18 +3,16 @@
#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 #define TRAMPOLINE_32BIT_CODE_SIZE 0xA0
#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
#ifndef __ASSEMBLER__ #ifndef __ASSEMBLER__
extern unsigned long *trampoline_32bit; extern unsigned long *trampoline_32bit;
extern void trampoline_32bit_src(void *return_ptr); extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
extern const u16 trampoline_ljmp_imm_offset;
#endif /* __ASSEMBLER__ */ #endif /* __ASSEMBLER__ */
#endif /* BOOT_COMPRESSED_PAGETABLE_H */ #endif /* BOOT_COMPRESSED_PAGETABLE_H */
...@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39; ...@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39;
unsigned int __section(".data") ptrs_per_p4d = 1; unsigned int __section(".data") ptrs_per_p4d = 1;
#endif #endif
struct paging_config {
unsigned long trampoline_start;
unsigned long l5_required;
};
/* Buffer to preserve trampoline memory */ /* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
...@@ -29,7 +24,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; ...@@ -29,7 +24,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
* purposes. * purposes.
* *
* Avoid putting the pointer into .bss as it will be cleared between * Avoid putting the pointer into .bss as it will be cleared between
* paging_prepare() and extract_kernel(). * configure_5level_paging() and extract_kernel().
*/ */
unsigned long *trampoline_32bit __section(".data"); unsigned long *trampoline_32bit __section(".data");
...@@ -106,12 +101,13 @@ static unsigned long find_trampoline_placement(void) ...@@ -106,12 +101,13 @@ static unsigned long find_trampoline_placement(void)
return bios_start - TRAMPOLINE_32BIT_SIZE; return bios_start - TRAMPOLINE_32BIT_SIZE;
} }
struct paging_config paging_prepare(void *rmode) asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
{ {
struct paging_config paging_config = {}; void (*toggle_la57)(void *cr3);
bool l5_required = false;
/* Initialize boot_params. Required for cmdline_find_option_bool(). */ /* Initialize boot_params. Required for cmdline_find_option_bool(). */
boot_params = rmode; boot_params = bp;
/* /*
* Check if LA57 is desired and supported. * Check if LA57 is desired and supported.
...@@ -129,12 +125,22 @@ struct paging_config paging_prepare(void *rmode) ...@@ -129,12 +125,22 @@ struct paging_config paging_prepare(void *rmode)
!cmdline_find_option_bool("no5lvl") && !cmdline_find_option_bool("no5lvl") &&
native_cpuid_eax(0) >= 7 && native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
paging_config.l5_required = 1; l5_required = true;
/* Initialize variables for 5-level paging */
__pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
} }
paging_config.trampoline_start = find_trampoline_placement(); /*
* The trampoline will not be used if the paging mode is already set to
* the desired one.
*/
if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
return;
trampoline_32bit = (unsigned long *)paging_config.trampoline_start; trampoline_32bit = (unsigned long *)find_trampoline_placement();
/* Preserve trampoline memory */ /* Preserve trampoline memory */
memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
...@@ -143,32 +149,32 @@ struct paging_config paging_prepare(void *rmode) ...@@ -143,32 +149,32 @@ struct paging_config paging_prepare(void *rmode)
memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
/* Copy trampoline code in place */ /* Copy trampoline code in place */
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), toggle_la57 = memcpy(trampoline_32bit +
TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
/*
* Avoid the need for a stack in the 32-bit trampoline code, by using
* LJMP rather than LRET to return back to long mode. LJMP takes an
* immediate absolute address, which needs to be adjusted based on the
* placement of the trampoline.
*/
*(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) +=
(unsigned long)toggle_la57;
/* /*
* The code below prepares page table in trampoline memory. * The code below prepares page table in trampoline memory.
* *
* The new page table will be used by trampoline code for switching * The new page table will be used by trampoline code for switching
* from 4- to 5-level paging or vice versa. * from 4- to 5-level paging or vice versa.
*
* If switching is not required, the page table is unused: trampoline
* code wouldn't touch CR3.
*/
/*
* We are not going to use the page table in trampoline memory if we
* are already in the desired paging mode.
*/ */
if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
goto out;
if (paging_config.l5_required) { if (l5_required) {
/* /*
* For 4- to 5-level paging transition, set up current CR3 as * For 4- to 5-level paging transition, set up current CR3 as
* the first and the only entry in a new top-level page table. * the first and the only entry in a new top-level page table.
*/ */
trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC;
} else { } else {
unsigned long src; unsigned long src;
...@@ -181,38 +187,17 @@ struct paging_config paging_prepare(void *rmode) ...@@ -181,38 +187,17 @@ struct paging_config paging_prepare(void *rmode)
* may be above 4G. * may be above 4G.
*/ */
src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), memcpy(trampoline_32bit, (void *)src, PAGE_SIZE);
(void *)src, PAGE_SIZE);
} }
out: toggle_la57(trampoline_32bit);
return paging_config;
}
void cleanup_trampoline(void *pgtable)
{
void *trampoline_pgtable;
trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
/* /*
* Move the top level page table out of trampoline memory, * Move the top level page table out of trampoline memory.
* if it's there.
*/ */
if ((void *)__native_read_cr3() == trampoline_pgtable) { memcpy(pgtable, trampoline_32bit, PAGE_SIZE);
memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
native_write_cr3((unsigned long)pgtable); native_write_cr3((unsigned long)pgtable);
}
/* Restore trampoline memory */ /* Restore trampoline memory */
memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
/* Initialize variables for 5-level paging */
#ifdef CONFIG_X86_5LEVEL
if (__read_cr4() & X86_CR4_LA57) {
__pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
}
#endif
} }
...@@ -367,20 +367,25 @@ static void enforce_vmpl0(void) ...@@ -367,20 +367,25 @@ static void enforce_vmpl0(void)
*/ */
#define SNP_FEATURES_PRESENT (0) #define SNP_FEATURES_PRESENT (0)
u64 snp_get_unsupported_features(u64 status)
{
if (!(status & MSR_AMD64_SEV_SNP_ENABLED))
return 0;
return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
}
void snp_check_features(void) void snp_check_features(void)
{ {
u64 unsupported; u64 unsupported;
if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
return;
/* /*
* Terminate the boot if hypervisor has enabled any feature lacking * Terminate the boot if hypervisor has enabled any feature lacking
* guest side implementation. Pass on the unsupported features mask through * guest side implementation. Pass on the unsupported features mask through
* EXIT_INFO_2 of the GHCB protocol so that those features can be reported * EXIT_INFO_2 of the GHCB protocol so that those features can be reported
* as part of the guest boot failure. * as part of the guest boot failure.
*/ */
unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; unsupported = snp_get_unsupported_features(sev_status);
if (unsupported) { if (unsupported) {
if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
...@@ -390,35 +395,22 @@ void snp_check_features(void) ...@@ -390,35 +395,22 @@ void snp_check_features(void)
} }
} }
void sev_enable(struct boot_params *bp) /*
{ * sev_check_cpu_support - Check for SEV support in the CPU capabilities
unsigned int eax, ebx, ecx, edx;
struct msr m;
bool snp;
/*
* bp->cc_blob_address should only be set by boot/compressed kernel.
* Initialize it to 0 to ensure that uninitialized values from
* buggy bootloaders aren't propagated.
*/
if (bp)
bp->cc_blob_address = 0;
/*
* Do an initial SEV capability check before snp_init() which
* loads the CPUID page and the same checks afterwards are done
* without the hypervisor and are trustworthy.
* *
* If the HV fakes SEV support, the guest will crash'n'burn * Returns < 0 if SEV is not supported, otherwise the position of the
* which is good enough. * encryption bit in the page table descriptors.
*/ */
static int sev_check_cpu_support(void)
{
unsigned int eax, ebx, ecx, edx;
/* Check for the SME/SEV support leaf */ /* Check for the SME/SEV support leaf */
eax = 0x80000000; eax = 0x80000000;
ecx = 0; ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx); native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f) if (eax < 0x8000001f)
return; return -ENODEV;
/* /*
* Check for the SME/SEV feature: * Check for the SME/SEV feature:
...@@ -433,6 +425,35 @@ void sev_enable(struct boot_params *bp) ...@@ -433,6 +425,35 @@ void sev_enable(struct boot_params *bp)
native_cpuid(&eax, &ebx, &ecx, &edx); native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */ /* Check whether SEV is supported */
if (!(eax & BIT(1))) if (!(eax & BIT(1)))
return -ENODEV;
return ebx & 0x3f;
}
void sev_enable(struct boot_params *bp)
{
struct msr m;
int bitpos;
bool snp;
/*
* bp->cc_blob_address should only be set by boot/compressed kernel.
* Initialize it to 0 to ensure that uninitialized values from
* buggy bootloaders aren't propagated.
*/
if (bp)
bp->cc_blob_address = 0;
/*
* Do an initial SEV capability check before snp_init() which
* loads the CPUID page and the same checks afterwards are done
* without the hypervisor and are trustworthy.
*
* If the HV fakes SEV support, the guest will crash'n'burn
* which is good enough.
*/
if (sev_check_cpu_support() < 0)
return; return;
/* /*
...@@ -443,26 +464,8 @@ void sev_enable(struct boot_params *bp) ...@@ -443,26 +464,8 @@ void sev_enable(struct boot_params *bp)
/* Now repeat the checks with the SNP CPUID table. */ /* Now repeat the checks with the SNP CPUID table. */
/* Recheck the SME/SEV support leaf */ bitpos = sev_check_cpu_support();
eax = 0x80000000; if (bitpos < 0) {
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f)
return;
/*
* Recheck for the SME/SEV feature:
* CPUID Fn8000_001F[EAX]
* - Bit 0 - Secure Memory Encryption support
* - Bit 1 - Secure Encrypted Virtualization support
* CPUID Fn8000_001F[EBX]
* - Bits 5:0 - Pagetable bit position used to indicate encryption
*/
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */
if (!(eax & BIT(1))) {
if (snp) if (snp)
error("SEV-SNP support indicated by CC blob, but not CPUID."); error("SEV-SNP support indicated by CC blob, but not CPUID.");
return; return;
...@@ -494,7 +497,24 @@ void sev_enable(struct boot_params *bp) ...@@ -494,7 +497,24 @@ void sev_enable(struct boot_params *bp)
if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
sme_me_mask = BIT_ULL(ebx & 0x3f); sme_me_mask = BIT_ULL(bitpos);
}
/*
* sev_get_status - Retrieve the SEV status mask
*
* Returns 0 if the CPU is not SEV capable, otherwise the value of the
* AMD64_SEV MSR.
*/
u64 sev_get_status(void)
{
struct msr m;
if (sev_check_cpu_support() < 0)
return 0;
boot_rdmsr(MSR_AMD64_SEV, &m);
return m.q;
} }
/* Search for Confidential Computing blob in the EFI config table. */ /* Search for Confidential Computing blob in the EFI config table. */
......
...@@ -62,4 +62,12 @@ ...@@ -62,4 +62,12 @@
# define BOOT_STACK_SIZE 0x1000 # define BOOT_STACK_SIZE 0x1000
#endif #endif
#ifndef __ASSEMBLY__
extern unsigned int output_len;
extern const unsigned long kernel_total_size;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
void (*error)(char *x));
#endif
#endif /* _ASM_X86_BOOT_H */ #endif /* _ASM_X86_BOOT_H */
...@@ -90,6 +90,8 @@ static inline void efi_fpu_end(void) ...@@ -90,6 +90,8 @@ static inline void efi_fpu_end(void)
} }
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1)
#define arch_efi_call_virt_setup() \ #define arch_efi_call_virt_setup() \
({ \ ({ \
efi_fpu_begin(); \ efi_fpu_begin(); \
...@@ -103,8 +105,7 @@ static inline void efi_fpu_end(void) ...@@ -103,8 +105,7 @@ static inline void efi_fpu_end(void)
}) })
#else /* !CONFIG_X86_32 */ #else /* !CONFIG_X86_32 */
#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT
#define EFI_LOADER_SIGNATURE "EL64"
extern asmlinkage u64 __efi_call(void *fp, ...); extern asmlinkage u64 __efi_call(void *fp, ...);
...@@ -218,6 +219,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, ...@@ -218,6 +219,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
#ifdef CONFIG_EFI_MIXED #ifdef CONFIG_EFI_MIXED
#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX)
#define ARCH_HAS_EFISTUB_WRAPPERS #define ARCH_HAS_EFISTUB_WRAPPERS
static inline bool efi_is_64bit(void) static inline bool efi_is_64bit(void)
......
...@@ -164,6 +164,7 @@ static __always_inline void sev_es_nmi_complete(void) ...@@ -164,6 +164,7 @@ static __always_inline void sev_es_nmi_complete(void)
__sev_es_nmi_complete(); __sev_es_nmi_complete();
} }
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
extern void sev_enable(struct boot_params *bp);
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{ {
...@@ -210,12 +211,15 @@ bool snp_init(struct boot_params *bp); ...@@ -210,12 +211,15 @@ bool snp_init(struct boot_params *bp);
void __init __noreturn snp_abort(void); void __init __noreturn snp_abort(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end); void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
#else #else
static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { } static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
static inline void sev_es_nmi_complete(void) { } static inline void sev_es_nmi_complete(void) { }
static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
static inline void sev_enable(struct boot_params *bp) { }
static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
static inline void setup_ghcb(void) { } static inline void setup_ghcb(void) { }
...@@ -235,6 +239,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in ...@@ -235,6 +239,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
} }
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
#endif #endif
#endif #endif
...@@ -51,7 +51,9 @@ SYM_CODE_START_NOALIGN(startup_64) ...@@ -51,7 +51,9 @@ SYM_CODE_START_NOALIGN(startup_64)
* for us. These identity mapped page tables map all of the * for us. These identity mapped page tables map all of the
* kernel pages and possibly all of memory. * kernel pages and possibly all of memory.
* *
* %rsi holds a physical pointer to real_mode_data. * %RSI holds the physical address of the boot_params structure
* provided by the bootloader. Preserve it in %R15 so C function calls
* will not clobber it.
* *
* We come here either directly from a 64bit bootloader, or from * We come here either directly from a 64bit bootloader, or from
* arch/x86/boot/compressed/head_64.S. * arch/x86/boot/compressed/head_64.S.
...@@ -62,6 +64,7 @@ SYM_CODE_START_NOALIGN(startup_64) ...@@ -62,6 +64,7 @@ SYM_CODE_START_NOALIGN(startup_64)
* compiled to run at we first fixup the physical addresses in our page * compiled to run at we first fixup the physical addresses in our page
* tables and then reload them. * tables and then reload them.
*/ */
mov %rsi, %r15
/* Set up the stack for verify_cpu() */ /* Set up the stack for verify_cpu() */
leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
...@@ -75,9 +78,7 @@ SYM_CODE_START_NOALIGN(startup_64) ...@@ -75,9 +78,7 @@ SYM_CODE_START_NOALIGN(startup_64)
shrq $32, %rdx shrq $32, %rdx
wrmsr wrmsr
pushq %rsi
call startup_64_setup_env call startup_64_setup_env
popq %rsi
/* Now switch to __KERNEL_CS so IRET works reliably */ /* Now switch to __KERNEL_CS so IRET works reliably */
pushq $__KERNEL_CS pushq $__KERNEL_CS
...@@ -93,12 +94,10 @@ SYM_CODE_START_NOALIGN(startup_64) ...@@ -93,12 +94,10 @@ SYM_CODE_START_NOALIGN(startup_64)
* Activate SEV/SME memory encryption if supported/enabled. This needs to * Activate SEV/SME memory encryption if supported/enabled. This needs to
* be done now, since this also includes setup of the SEV-SNP CPUID table, * be done now, since this also includes setup of the SEV-SNP CPUID table,
* which needs to be done before any CPUID instructions are executed in * which needs to be done before any CPUID instructions are executed in
* subsequent code. * subsequent code. Pass the boot_params pointer as the first argument.
*/ */
movq %rsi, %rdi movq %r15, %rdi
pushq %rsi
call sme_enable call sme_enable
popq %rsi
#endif #endif
/* Sanitize CPU configuration */ /* Sanitize CPU configuration */
...@@ -111,9 +110,8 @@ SYM_CODE_START_NOALIGN(startup_64) ...@@ -111,9 +110,8 @@ SYM_CODE_START_NOALIGN(startup_64)
* programmed into CR3. * programmed into CR3.
*/ */
leaq _text(%rip), %rdi leaq _text(%rip), %rdi
pushq %rsi movq %r15, %rsi
call __startup_64 call __startup_64
popq %rsi
/* Form the CR3 value being sure to include the CR3 modifier */ /* Form the CR3 value being sure to include the CR3 modifier */
addq $(early_top_pgt - __START_KERNEL_map), %rax addq $(early_top_pgt - __START_KERNEL_map), %rax
...@@ -127,8 +125,6 @@ SYM_CODE_START(secondary_startup_64) ...@@ -127,8 +125,6 @@ SYM_CODE_START(secondary_startup_64)
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
* and someone has loaded a mapped page table. * and someone has loaded a mapped page table.
* *
* %rsi holds a physical pointer to real_mode_data.
*
* We come here either from startup_64 (using physical addresses) * We come here either from startup_64 (using physical addresses)
* or from trampoline.S (using virtual addresses). * or from trampoline.S (using virtual addresses).
* *
...@@ -153,6 +149,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ...@@ -153,6 +149,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
UNWIND_HINT_END_OF_STACK UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR ANNOTATE_NOENDBR
/* Clear %R15 which holds the boot_params pointer on the boot CPU */
xorq %r15, %r15
/* /*
* Retrieve the modifier (SME encryption mask if SME is active) to be * Retrieve the modifier (SME encryption mask if SME is active) to be
* added to the initial pgdir entry that will be programmed into CR3. * added to the initial pgdir entry that will be programmed into CR3.
...@@ -199,13 +198,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ...@@ -199,13 +198,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* hypervisor could lie about the C-bit position to perform a ROP * hypervisor could lie about the C-bit position to perform a ROP
* attack on the guest by writing to the unencrypted stack and wait for * attack on the guest by writing to the unencrypted stack and wait for
* the next RET instruction. * the next RET instruction.
* %rsi carries pointer to realmode data and is callee-clobbered. Save
* and restore it.
*/ */
pushq %rsi
movq %rax, %rdi movq %rax, %rdi
call sev_verify_cbit call sev_verify_cbit
popq %rsi
/* /*
* Switch to new page-table * Switch to new page-table
...@@ -365,9 +360,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ...@@ -365,9 +360,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
wrmsr wrmsr
/* Setup and Load IDT */ /* Setup and Load IDT */
pushq %rsi
call early_setup_idt call early_setup_idt
popq %rsi
/* Check if nx is implemented */ /* Check if nx is implemented */
movl $0x80000001, %eax movl $0x80000001, %eax
...@@ -403,9 +396,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) ...@@ -403,9 +396,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
pushq $0 pushq $0
popfq popfq
/* rsi is pointer to real mode structure with interesting info. /* Pass the boot_params pointer as first argument */
pass it to C */ movq %r15, %rdi
movq %rsi, %rdi
.Ljump_to_C_code: .Ljump_to_C_code:
/* /*
......
...@@ -88,6 +88,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \ ...@@ -88,6 +88,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \
lib-$(CONFIG_ARM) += arm32-stub.o lib-$(CONFIG_ARM) += arm32-stub.o
lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o
lib-$(CONFIG_X86) += x86-stub.o lib-$(CONFIG_X86) += x86-stub.o
lib-$(CONFIG_X86_64) += x86-5lvl.o
lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o
lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o
......
...@@ -106,7 +106,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, ...@@ -106,7 +106,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
*/ */
status = efi_random_alloc(*reserve_size, min_kimg_align, status = efi_random_alloc(*reserve_size, min_kimg_align,
reserve_addr, phys_seed, reserve_addr, phys_seed,
EFI_LOADER_CODE); EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
if (status != EFI_SUCCESS) if (status != EFI_SUCCESS)
efi_warn("efi_random_alloc() failed: 0x%lx\n", status); efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
} else { } else {
......
...@@ -73,6 +73,8 @@ efi_status_t efi_parse_options(char const *cmdline) ...@@ -73,6 +73,8 @@ efi_status_t efi_parse_options(char const *cmdline)
efi_loglevel = CONSOLE_LOGLEVEL_QUIET; efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
} else if (!strcmp(param, "noinitrd")) { } else if (!strcmp(param, "noinitrd")) {
efi_noinitrd = true; efi_noinitrd = true;
} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
efi_no5lvl = true;
} else if (!strcmp(param, "efi") && val) { } else if (!strcmp(param, "efi") && val) {
efi_nochunk = parse_option_str(val, "nochunk"); efi_nochunk = parse_option_str(val, "nochunk");
efi_novamap |= parse_option_str(val, "novamap"); efi_novamap |= parse_option_str(val, "novamap");
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#define EFI_ALLOC_LIMIT ULONG_MAX #define EFI_ALLOC_LIMIT ULONG_MAX
#endif #endif
extern bool efi_no5lvl;
extern bool efi_nochunk; extern bool efi_nochunk;
extern bool efi_nokaslr; extern bool efi_nokaslr;
extern int efi_loglevel; extern int efi_loglevel;
...@@ -955,7 +956,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out); ...@@ -955,7 +956,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
efi_status_t efi_random_alloc(unsigned long size, unsigned long align, efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
unsigned long *addr, unsigned long random_seed, unsigned long *addr, unsigned long random_seed,
int memory_type); int memory_type, unsigned long alloc_limit);
efi_status_t efi_random_get_seed(void); efi_status_t efi_random_get_seed(void);
......
...@@ -16,7 +16,8 @@ ...@@ -16,7 +16,8 @@
*/ */
static unsigned long get_entry_num_slots(efi_memory_desc_t *md, static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
unsigned long size, unsigned long size,
unsigned long align_shift) unsigned long align_shift,
u64 alloc_limit)
{ {
unsigned long align = 1UL << align_shift; unsigned long align = 1UL << align_shift;
u64 first_slot, last_slot, region_end; u64 first_slot, last_slot, region_end;
...@@ -29,7 +30,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md, ...@@ -29,7 +30,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
return 0; return 0;
region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1, region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
(u64)EFI_ALLOC_LIMIT); alloc_limit);
if (region_end < size) if (region_end < size)
return 0; return 0;
...@@ -54,7 +55,8 @@ efi_status_t efi_random_alloc(unsigned long size, ...@@ -54,7 +55,8 @@ efi_status_t efi_random_alloc(unsigned long size,
unsigned long align, unsigned long align,
unsigned long *addr, unsigned long *addr,
unsigned long random_seed, unsigned long random_seed,
int memory_type) int memory_type,
unsigned long alloc_limit)
{ {
unsigned long total_slots = 0, target_slot; unsigned long total_slots = 0, target_slot;
unsigned long total_mirrored_slots = 0; unsigned long total_mirrored_slots = 0;
...@@ -76,7 +78,7 @@ efi_status_t efi_random_alloc(unsigned long size, ...@@ -76,7 +78,7 @@ efi_status_t efi_random_alloc(unsigned long size,
efi_memory_desc_t *md = (void *)map->map + map_offset; efi_memory_desc_t *md = (void *)map->map + map_offset;
unsigned long slots; unsigned long slots;
slots = get_entry_num_slots(md, size, ilog2(align)); slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit);
MD_NUM_SLOTS(md) = slots; MD_NUM_SLOTS(md) = slots;
total_slots += slots; total_slots += slots;
if (md->attribute & EFI_MEMORY_MORE_RELIABLE) if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/efi.h>
#include <asm/boot.h>
#include <asm/desc.h>
#include <asm/efi.h>
#include "efistub.h"
#include "x86-stub.h"
bool efi_no5lvl;
static void (*la57_toggle)(void *cr3);
static const struct desc_struct gdt[] = {
[GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
[GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
};
/*
* Enabling (or disabling) 5 level paging is tricky, because it can only be
* done from 32-bit mode with paging disabled. This means not only that the
* code itself must be running from 32-bit addressable physical memory, but
* also that the root page table must be 32-bit addressable, as programming
* a 64-bit value into CR3 when running in 32-bit mode is not supported.
*/
efi_status_t efi_setup_5level_paging(void)
{
u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src;
efi_status_t status;
u8 *la57_code;
if (!efi_is_64bit())
return EFI_SUCCESS;
/* check for 5 level paging support */
if (native_cpuid_eax(0) < 7 ||
!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
return EFI_SUCCESS;
/* allocate some 32-bit addressable memory for code and a page table */
status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
U32_MAX);
if (status != EFI_SUCCESS)
return status;
la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size);
memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
/*
* To avoid the need to allocate a 32-bit addressable stack, the
* trampoline uses a LJMP instruction to switch back to long mode.
* LJMP takes an absolute destination address, which needs to be
* fixed up at runtime.
*/
*(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code;
efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE);
return EFI_SUCCESS;
}
void efi_5level_switch(void)
{
bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
bool have_la57 = native_read_cr4() & X86_CR4_LA57;
bool need_toggle = want_la57 ^ have_la57;
u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
u64 *cr3 = (u64 *)__native_read_cr3();
u64 *new_cr3;
if (!la57_toggle || !need_toggle)
return;
if (!have_la57) {
/*
* 5 level paging will be enabled, so a root level page needs
* to be allocated from the 32-bit addressable physical region,
* with its first entry referring to the existing hierarchy.
*/
new_cr3 = memset(pgt, 0, PAGE_SIZE);
new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
} else {
/* take the new root table pointer from the current entry #0 */
new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
/* copy the new root table if it is not 32-bit addressable */
if ((u64)new_cr3 > U32_MAX)
new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
}
native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt });
la57_toggle(new_cr3);
}
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/efi.h>
extern void trampoline_32bit_src(void *, bool);
extern const u16 trampoline_ljmp_imm_offset;
void efi_adjust_memory_range_protection(unsigned long start,
unsigned long size);
#ifdef CONFIG_X86_64
efi_status_t efi_setup_5level_paging(void);
void efi_5level_switch(void);
#else
static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; }
static inline void efi_5level_switch(void) {}
#endif
...@@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab) ...@@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
} }
status = efi_random_alloc(alloc_size, min_kimg_align, &image_base, status = efi_random_alloc(alloc_size, min_kimg_align, &image_base,
seed, EFI_LOADER_CODE); seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
if (status != EFI_SUCCESS) { if (status != EFI_SUCCESS) {
efi_err("Failed to allocate memory\n"); efi_err("Failed to allocate memory\n");
goto free_cmdline; goto free_cmdline;
......
...@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size) ...@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size)
if (!malloc_ptr) if (!malloc_ptr)
malloc_ptr = free_mem_ptr; malloc_ptr = free_mem_ptr;
malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */ malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */
p = (void *)malloc_ptr; p = (void *)malloc_ptr;
malloc_ptr += size; malloc_ptr += size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment