Commit bd9e99f7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_boot_for_v6.6_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Borislav Petkov:
 "Avoid the baremetal decompressor code when booting on an EFI machine.

  This is mandated by the current tightening of EFI executables
  requirements when used in a secure boot scenario. More specifically,
  an EFI executable cannot have a single section with RWX permissions,
  which conflicts with the in-place kernel decompression that is done
  today.

  Instead, the things required by the booting kernel image are done in
  the EFI stub now.

  Work by Ard Biesheuvel"

* tag 'x86_boot_for_v6.6_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (23 commits)
  x86/efistub: Avoid legacy decompressor when doing EFI boot
  x86/efistub: Perform SNP feature test while running in the firmware
  efi/libstub: Add limit argument to efi_random_alloc()
  x86/decompressor: Factor out kernel decompression and relocation
  x86/decompressor: Move global symbol references to C code
  decompress: Use 8 byte alignment
  x86/efistub: Prefer EFI memory attributes protocol over DXE services
  x86/efistub: Perform 4/5 level paging switch from the stub
  x86/decompressor: Merge trampoline cleanup with switching code
  x86/decompressor: Pass pgtable address to trampoline directly
  x86/decompressor: Only call the trampoline when changing paging levels
  x86/decompressor: Call trampoline directly from C code
  x86/decompressor: Avoid the need for a stack in the 32-bit trampoline
  x86/decompressor: Use standard calling convention for trampoline
  x86/decompressor: Call trampoline as a normal function
  x86/decompressor: Assign paging related global variables earlier
  x86/decompressor: Store boot_params pointer in callee save register
  x86/efistub: Clear BSS in EFI handover protocol entrypoint
  x86/decompressor: Avoid magic offsets for EFI handover entrypoint
  x86/efistub: Simplify and clean up handover entry code
  ...
parents 6f49693a a1b87d54
......@@ -1417,7 +1417,7 @@ execution context provided by the EFI firmware.
The function prototype for the handover entry point looks like this::
efi_main(void *handle, efi_system_table_t *table, struct boot_params *bp)
efi_stub_entry(void *handle, efi_system_table_t *table, struct boot_params *bp)
'handle' is the EFI image handle passed to the boot loader by the EFI
firmware, 'table' is the EFI system table - these are the first two
......
......@@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack
ifeq ($(CONFIG_LD_IS_BFD),y)
LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments)
endif
ifeq ($(CONFIG_EFI_STUB),y)
# ensure that the static EFI stub library will be pulled in, even if it is
# never referenced explicitly from the startup code
LDFLAGS_vmlinux += -u efi_pe_entry
endif
LDFLAGS_vmlinux += -T
hostprogs := mkpiggy
......
......@@ -26,8 +26,8 @@
* When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode()
* is the first thing that runs after switching to long mode. Depending on
* whether the EFI handover protocol or the compat entry point was used to
* enter the kernel, it will either branch to the 64-bit EFI handover
* entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF
* enter the kernel, it will either branch to the common 64-bit EFI stub
* entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF
* entrypoint efi_pe_entry(). In the former case, the bootloader must provide a
* struct bootparams pointer as the third argument, so the presence of such a
* pointer is used to disambiguate.
......@@ -37,21 +37,23 @@
* | efi32_pe_entry |---->| | | +-----------+--+
* +------------------+ | | +------+----------------+ |
* | startup_32 |---->| startup_64_mixed_mode | |
* +------------------+ | | +------+----------------+ V
* | efi32_stub_entry |---->| | | +------------------+
* +------------------+ +------------+ +---->| efi64_stub_entry |
* +-------------+----+
* +------------+ +----------+ |
* | startup_64 |<----| efi_main |<--------------+
* +------------+ +----------+
* +------------------+ | | +------+----------------+ |
* | efi32_stub_entry |---->| | | |
* +------------------+ +------------+ | |
* V |
* +------------+ +----------------+ |
* | startup_64 |<----| efi_stub_entry |<--------+
* +------------+ +----------------+
*/
SYM_FUNC_START(startup_64_mixed_mode)
lea efi32_boot_args(%rip), %rdx
mov 0(%rdx), %edi
mov 4(%rdx), %esi
#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
mov 8(%rdx), %edx // saved bootparams pointer
test %edx, %edx
jnz efi64_stub_entry
jnz efi_stub_entry
#endif
/*
* efi_pe_entry uses MS calling convention, which requires 32 bytes of
* shadow space on the stack even if all arguments are passed in
......@@ -138,6 +140,28 @@ SYM_FUNC_START(__efi64_thunk)
SYM_FUNC_END(__efi64_thunk)
.code32
#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
SYM_FUNC_START(efi32_stub_entry)
call 1f
1: popl %ecx
/* Clear BSS */
xorl %eax, %eax
leal (_bss - 1b)(%ecx), %edi
leal (_ebss - 1b)(%ecx), %ecx
subl %edi, %ecx
shrl $2, %ecx
cld
rep stosl
add $0x4, %esp /* Discard return address */
popl %ecx
popl %edx
popl %esi
jmp efi32_entry
SYM_FUNC_END(efi32_stub_entry)
#endif
/*
* EFI service pointer must be in %edi.
*
......@@ -218,7 +242,7 @@ SYM_FUNC_END(efi_enter32)
* stub may still exit and return to the firmware using the Exit() EFI boot
* service.]
*/
SYM_FUNC_START(efi32_entry)
SYM_FUNC_START_LOCAL(efi32_entry)
call 1f
1: pop %ebx
......@@ -245,10 +269,6 @@ SYM_FUNC_START(efi32_entry)
jmp startup_32
SYM_FUNC_END(efi32_entry)
#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime)
#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol)
#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base)
/*
* efi_status_t efi32_pe_entry(efi_handle_t image_handle,
* efi_system_table_32_t *sys_table)
......@@ -256,8 +276,6 @@ SYM_FUNC_END(efi32_entry)
SYM_FUNC_START(efi32_pe_entry)
pushl %ebp
movl %esp, %ebp
pushl %eax // dummy push to allocate loaded_image
pushl %ebx // save callee-save registers
pushl %edi
......@@ -266,48 +284,8 @@ SYM_FUNC_START(efi32_pe_entry)
movl $0x80000003, %eax // EFI_UNSUPPORTED
jnz 2f
call 1f
1: pop %ebx
/* Get the loaded image protocol pointer from the image handle */
leal -4(%ebp), %eax
pushl %eax // &loaded_image
leal (loaded_image_proto - 1b)(%ebx), %eax
pushl %eax // pass the GUID address
pushl 8(%ebp) // pass the image handle
/*
* Note the alignment of the stack frame.
* sys_table
* handle <-- 16-byte aligned on entry by ABI
* return address
* frame pointer
* loaded_image <-- local variable
* saved %ebx <-- 16-byte aligned here
* saved %edi
* &loaded_image
* &loaded_image_proto
* handle <-- 16-byte aligned for call to handle_protocol
*/
movl 12(%ebp), %eax // sys_table
movl ST32_boottime(%eax), %eax // sys_table->boottime
call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol
addl $12, %esp // restore argument space
testl %eax, %eax
jnz 2f
movl 8(%ebp), %ecx // image_handle
movl 12(%ebp), %edx // sys_table
movl -4(%ebp), %esi // loaded_image
movl LI32_image_base(%esi), %esi // loaded_image->image_base
leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32
/*
* We need to set the image_offset variable here since startup_32() will
* use it before we get to the 64-bit efi_pe_entry() in C code.
*/
subl %esi, %ebp // calculate image_offset
movl %ebp, (image_offset - 1b)(%ebx) // save image_offset
xorl %esi, %esi
jmp efi32_entry // pass %ecx, %edx, %esi
// no other registers remain live
......@@ -318,14 +296,13 @@ SYM_FUNC_START(efi32_pe_entry)
RET
SYM_FUNC_END(efi32_pe_entry)
.section ".rodata"
/* EFI loaded image protocol GUID */
.balign 4
SYM_DATA_START_LOCAL(loaded_image_proto)
.long 0x5b1b31a1
.word 0x9562, 0x11d2
.byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b
SYM_DATA_END(loaded_image_proto)
#ifdef CONFIG_EFI_HANDOVER_PROTOCOL
.org efi32_stub_entry + 0x200
.code64
SYM_FUNC_START_NOALIGN(efi64_stub_entry)
jmp efi_handover_entry
SYM_FUNC_END(efi64_stub_entry)
#endif
.data
.balign 8
......
......@@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32)
#ifdef CONFIG_RELOCATABLE
leal startup_32@GOTOFF(%edx), %ebx
#ifdef CONFIG_EFI_STUB
/*
* If we were loaded via the EFI LoadImage service, startup_32() will be at an
* offset to the start of the space allocated for the image. efi_pe_entry() will
* set up image_offset to tell us where the image actually starts, so that we
* can use the full available buffer.
* image_offset = startup_32 - image_base
* Otherwise image_offset will be zero and has no effect on the calculations.
*/
subl image_offset@GOTOFF(%edx), %ebx
#endif
movl BP_kernel_alignment(%esi), %eax
decl %eax
addl %eax, %ebx
......@@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32)
jmp *%eax
SYM_FUNC_END(startup_32)
#ifdef CONFIG_EFI_STUB
SYM_FUNC_START(efi32_stub_entry)
add $0x4, %esp
movl 8(%esp), %esi /* save boot_params pointer */
call efi_main
/* efi_main returns the possibly relocated address of startup_32 */
jmp *%eax
SYM_FUNC_END(efi32_stub_entry)
SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry)
#endif
.text
SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
......@@ -179,13 +155,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated)
*/
/* push arguments for extract_kernel: */
pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */
pushl %ebp /* output address */
pushl input_len@GOTOFF(%ebx) /* input_len */
leal input_data@GOTOFF(%ebx), %eax
pushl %eax /* input_data */
leal boot_heap@GOTOFF(%ebx), %eax
pushl %eax /* heap area */
pushl %esi /* real mode pointer */
call extract_kernel /* returns kernel entry point in %eax */
addl $24, %esp
......@@ -213,8 +183,6 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end)
*/
.bss
.balign 4
boot_heap:
.fill BOOT_HEAP_SIZE, 1, 0
boot_stack:
.fill BOOT_STACK_SIZE, 1, 0
boot_stack_end:
This diff is collapsed.
......@@ -330,6 +330,33 @@ static size_t parse_elf(void *output)
return ehdr.e_entry - LOAD_PHYSICAL_ADDR;
}
const unsigned long kernel_total_size = VO__end - VO__text;
static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4);
extern unsigned char input_data[];
extern unsigned int input_len, output_len;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
void (*error)(char *x))
{
unsigned long entry;
if (!free_mem_ptr) {
free_mem_ptr = (unsigned long)boot_heap;
free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap);
}
if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len,
NULL, error) < 0)
return ULONG_MAX;
entry = parse_elf(outbuf);
handle_relocations(outbuf, output_len, virt_addr);
return entry;
}
/*
* The compressed kernel image (ZO), has been moved so that its position
* is against the end of the buffer used to hold the uncompressed kernel
......@@ -347,14 +374,10 @@ static size_t parse_elf(void *output)
* |-------uncompressed kernel image---------|
*
*/
asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
unsigned char *input_data,
unsigned long input_len,
unsigned char *output,
unsigned long output_len)
asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
{
const unsigned long kernel_total_size = VO__end - VO__text;
unsigned long virt_addr = LOAD_PHYSICAL_ADDR;
memptr heap = (memptr)boot_heap;
unsigned long needed_size;
size_t entry_offset;
......@@ -412,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
* entries. This ensures the full mapped area is usable RAM
* and doesn't include any reserved areas.
*/
needed_size = max(output_len, kernel_total_size);
needed_size = max_t(unsigned long, output_len, kernel_total_size);
#ifdef CONFIG_X86_64
needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN);
#endif
......@@ -443,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
#ifdef CONFIG_X86_64
if (heap > 0x3fffffffffffUL)
error("Destination address too large");
if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE)
if (virt_addr + needed_size > KERNEL_IMAGE_SIZE)
error("Destination virtual address is beyond the kernel mapping area");
#else
if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
......@@ -461,10 +484,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap,
accept_memory(__pa(output), __pa(output) + needed_size);
}
__decompress(input_data, input_len, NULL, NULL, output, output_len,
NULL, error);
entry_offset = parse_elf(output);
handle_relocations(output, output_len, virt_addr);
entry_offset = decompress_kernel(output, virt_addr, error);
debug_putstr("done.\nBooting the kernel (entry_offset: 0x");
debug_puthex(entry_offset);
......
......@@ -179,9 +179,7 @@ static inline int count_immovable_mem_regions(void) { return 0; }
#endif
/* ident_map_64.c */
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d;
#endif
extern void kernel_add_identity_map(unsigned long start, unsigned long end);
/* Used by PAGE_KERN* macros: */
......
......@@ -3,18 +3,16 @@
#define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE)
#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
#define TRAMPOLINE_32BIT_CODE_SIZE 0x80
#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0
#ifndef __ASSEMBLER__
extern unsigned long *trampoline_32bit;
extern void trampoline_32bit_src(void *return_ptr);
extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
extern const u16 trampoline_ljmp_imm_offset;
#endif /* __ASSEMBLER__ */
#endif /* BOOT_COMPRESSED_PAGETABLE_H */
......@@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39;
unsigned int __section(".data") ptrs_per_p4d = 1;
#endif
struct paging_config {
unsigned long trampoline_start;
unsigned long l5_required;
};
/* Buffer to preserve trampoline memory */
static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
......@@ -29,7 +24,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
* purposes.
*
* Avoid putting the pointer into .bss as it will be cleared between
* paging_prepare() and extract_kernel().
* configure_5level_paging() and extract_kernel().
*/
unsigned long *trampoline_32bit __section(".data");
......@@ -106,12 +101,13 @@ static unsigned long find_trampoline_placement(void)
return bios_start - TRAMPOLINE_32BIT_SIZE;
}
struct paging_config paging_prepare(void *rmode)
asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable)
{
struct paging_config paging_config = {};
void (*toggle_la57)(void *cr3);
bool l5_required = false;
/* Initialize boot_params. Required for cmdline_find_option_bool(). */
boot_params = rmode;
boot_params = bp;
/*
* Check if LA57 is desired and supported.
......@@ -129,12 +125,22 @@ struct paging_config paging_prepare(void *rmode)
!cmdline_find_option_bool("no5lvl") &&
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
paging_config.l5_required = 1;
l5_required = true;
/* Initialize variables for 5-level paging */
__pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
}
paging_config.trampoline_start = find_trampoline_placement();
/*
* The trampoline will not be used if the paging mode is already set to
* the desired one.
*/
if (l5_required == !!(native_read_cr4() & X86_CR4_LA57))
return;
trampoline_32bit = (unsigned long *)paging_config.trampoline_start;
trampoline_32bit = (unsigned long *)find_trampoline_placement();
/* Preserve trampoline memory */
memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE);
......@@ -143,32 +149,32 @@ struct paging_config paging_prepare(void *rmode)
memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
/* Copy trampoline code in place */
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
toggle_la57 = memcpy(trampoline_32bit +
TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
/*
* Avoid the need for a stack in the 32-bit trampoline code, by using
* LJMP rather than LRET to return back to long mode. LJMP takes an
* immediate absolute address, which needs to be adjusted based on the
* placement of the trampoline.
*/
*(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) +=
(unsigned long)toggle_la57;
/*
* The code below prepares page table in trampoline memory.
*
* The new page table will be used by trampoline code for switching
* from 4- to 5-level paging or vice versa.
*
* If switching is not required, the page table is unused: trampoline
* code wouldn't touch CR3.
*/
/*
* We are not going to use the page table in trampoline memory if we
* are already in the desired paging mode.
*/
if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57))
goto out;
if (paging_config.l5_required) {
if (l5_required) {
/*
* For 4- to 5-level paging transition, set up current CR3 as
* the first and the only entry in a new top-level page table.
*/
trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC;
*trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC;
} else {
unsigned long src;
......@@ -181,38 +187,17 @@ struct paging_config paging_prepare(void *rmode)
* may be above 4G.
*/
src = *(unsigned long *)__native_read_cr3() & PAGE_MASK;
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long),
(void *)src, PAGE_SIZE);
memcpy(trampoline_32bit, (void *)src, PAGE_SIZE);
}
out:
return paging_config;
}
void cleanup_trampoline(void *pgtable)
{
void *trampoline_pgtable;
trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
toggle_la57(trampoline_32bit);
/*
* Move the top level page table out of trampoline memory,
* if it's there.
* Move the top level page table out of trampoline memory.
*/
if ((void *)__native_read_cr3() == trampoline_pgtable) {
memcpy(pgtable, trampoline_pgtable, PAGE_SIZE);
native_write_cr3((unsigned long)pgtable);
}
memcpy(pgtable, trampoline_32bit, PAGE_SIZE);
native_write_cr3((unsigned long)pgtable);
/* Restore trampoline memory */
memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE);
/* Initialize variables for 5-level paging */
#ifdef CONFIG_X86_5LEVEL
if (__read_cr4() & X86_CR4_LA57) {
__pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
}
#endif
}
......@@ -367,20 +367,25 @@ static void enforce_vmpl0(void)
*/
#define SNP_FEATURES_PRESENT (0)
u64 snp_get_unsupported_features(u64 status)
{
if (!(status & MSR_AMD64_SEV_SNP_ENABLED))
return 0;
return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
}
void snp_check_features(void)
{
u64 unsupported;
if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
return;
/*
* Terminate the boot if hypervisor has enabled any feature lacking
* guest side implementation. Pass on the unsupported features mask through
* EXIT_INFO_2 of the GHCB protocol so that those features can be reported
* as part of the guest boot failure.
*/
unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT;
unsupported = snp_get_unsupported_features(sev_status);
if (unsupported) {
if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb()))
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
......@@ -390,35 +395,22 @@ void snp_check_features(void)
}
}
void sev_enable(struct boot_params *bp)
/*
* sev_check_cpu_support - Check for SEV support in the CPU capabilities
*
* Returns < 0 if SEV is not supported, otherwise the position of the
* encryption bit in the page table descriptors.
*/
static int sev_check_cpu_support(void)
{
unsigned int eax, ebx, ecx, edx;
struct msr m;
bool snp;
/*
* bp->cc_blob_address should only be set by boot/compressed kernel.
* Initialize it to 0 to ensure that uninitialized values from
* buggy bootloaders aren't propagated.
*/
if (bp)
bp->cc_blob_address = 0;
/*
* Do an initial SEV capability check before snp_init() which
* loads the CPUID page and the same checks afterwards are done
* without the hypervisor and are trustworthy.
*
* If the HV fakes SEV support, the guest will crash'n'burn
* which is good enough.
*/
/* Check for the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f)
return;
return -ENODEV;
/*
* Check for the SME/SEV feature:
......@@ -433,6 +425,35 @@ void sev_enable(struct boot_params *bp)
native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */
if (!(eax & BIT(1)))
return -ENODEV;
return ebx & 0x3f;
}
void sev_enable(struct boot_params *bp)
{
struct msr m;
int bitpos;
bool snp;
/*
* bp->cc_blob_address should only be set by boot/compressed kernel.
* Initialize it to 0 to ensure that uninitialized values from
* buggy bootloaders aren't propagated.
*/
if (bp)
bp->cc_blob_address = 0;
/*
* Do an initial SEV capability check before snp_init() which
* loads the CPUID page and the same checks afterwards are done
* without the hypervisor and are trustworthy.
*
* If the HV fakes SEV support, the guest will crash'n'burn
* which is good enough.
*/
if (sev_check_cpu_support() < 0)
return;
/*
......@@ -443,26 +464,8 @@ void sev_enable(struct boot_params *bp)
/* Now repeat the checks with the SNP CPUID table. */
/* Recheck the SME/SEV support leaf */
eax = 0x80000000;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
if (eax < 0x8000001f)
return;
/*
* Recheck for the SME/SEV feature:
* CPUID Fn8000_001F[EAX]
* - Bit 0 - Secure Memory Encryption support
* - Bit 1 - Secure Encrypted Virtualization support
* CPUID Fn8000_001F[EBX]
* - Bits 5:0 - Pagetable bit position used to indicate encryption
*/
eax = 0x8000001f;
ecx = 0;
native_cpuid(&eax, &ebx, &ecx, &edx);
/* Check whether SEV is supported */
if (!(eax & BIT(1))) {
bitpos = sev_check_cpu_support();
if (bitpos < 0) {
if (snp)
error("SEV-SNP support indicated by CC blob, but not CPUID.");
return;
......@@ -494,7 +497,24 @@ void sev_enable(struct boot_params *bp)
if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED))
error("SEV-SNP supported indicated by CC blob, but not SEV status MSR.");
sme_me_mask = BIT_ULL(ebx & 0x3f);
sme_me_mask = BIT_ULL(bitpos);
}
/*
* sev_get_status - Retrieve the SEV status mask
*
* Returns 0 if the CPU is not SEV capable, otherwise the value of the
* AMD64_SEV MSR.
*/
u64 sev_get_status(void)
{
struct msr m;
if (sev_check_cpu_support() < 0)
return 0;
boot_rdmsr(MSR_AMD64_SEV, &m);
return m.q;
}
/* Search for Confidential Computing blob in the EFI config table. */
......
......@@ -62,4 +62,12 @@
# define BOOT_STACK_SIZE 0x1000
#endif
#ifndef __ASSEMBLY__
extern unsigned int output_len;
extern const unsigned long kernel_total_size;
unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
void (*error)(char *x));
#endif
#endif /* _ASM_X86_BOOT_H */
......@@ -90,6 +90,8 @@ static inline void efi_fpu_end(void)
}
#ifdef CONFIG_X86_32
#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1)
#define arch_efi_call_virt_setup() \
({ \
efi_fpu_begin(); \
......@@ -103,8 +105,7 @@ static inline void efi_fpu_end(void)
})
#else /* !CONFIG_X86_32 */
#define EFI_LOADER_SIGNATURE "EL64"
#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT
extern asmlinkage u64 __efi_call(void *fp, ...);
......@@ -218,6 +219,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size,
#ifdef CONFIG_EFI_MIXED
#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX)
#define ARCH_HAS_EFISTUB_WRAPPERS
static inline bool efi_is_64bit(void)
......
......@@ -164,6 +164,7 @@ static __always_inline void sev_es_nmi_complete(void)
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
extern void sev_enable(struct boot_params *bp);
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
......@@ -210,12 +211,15 @@ bool snp_init(struct boot_params *bp);
void __init __noreturn snp_abort(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
static inline void sev_es_nmi_complete(void) { }
static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
static inline void sev_enable(struct boot_params *bp) { }
static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; }
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; }
static inline void setup_ghcb(void) { }
......@@ -235,6 +239,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
}
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
#endif
#endif
......@@ -51,7 +51,9 @@ SYM_CODE_START_NOALIGN(startup_64)
* for us. These identity mapped page tables map all of the
* kernel pages and possibly all of memory.
*
* %rsi holds a physical pointer to real_mode_data.
* %RSI holds the physical address of the boot_params structure
* provided by the bootloader. Preserve it in %R15 so C function calls
* will not clobber it.
*
* We come here either directly from a 64bit bootloader, or from
* arch/x86/boot/compressed/head_64.S.
......@@ -62,6 +64,7 @@ SYM_CODE_START_NOALIGN(startup_64)
* compiled to run at we first fixup the physical addresses in our page
* tables and then reload them.
*/
mov %rsi, %r15
/* Set up the stack for verify_cpu() */
leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp
......@@ -75,9 +78,7 @@ SYM_CODE_START_NOALIGN(startup_64)
shrq $32, %rdx
wrmsr
pushq %rsi
call startup_64_setup_env
popq %rsi
/* Now switch to __KERNEL_CS so IRET works reliably */
pushq $__KERNEL_CS
......@@ -93,12 +94,10 @@ SYM_CODE_START_NOALIGN(startup_64)
* Activate SEV/SME memory encryption if supported/enabled. This needs to
* be done now, since this also includes setup of the SEV-SNP CPUID table,
* which needs to be done before any CPUID instructions are executed in
* subsequent code.
* subsequent code. Pass the boot_params pointer as the first argument.
*/
movq %rsi, %rdi
pushq %rsi
movq %r15, %rdi
call sme_enable
popq %rsi
#endif
/* Sanitize CPU configuration */
......@@ -111,9 +110,8 @@ SYM_CODE_START_NOALIGN(startup_64)
* programmed into CR3.
*/
leaq _text(%rip), %rdi
pushq %rsi
movq %r15, %rsi
call __startup_64
popq %rsi
/* Form the CR3 value being sure to include the CR3 modifier */
addq $(early_top_pgt - __START_KERNEL_map), %rax
......@@ -127,8 +125,6 @@ SYM_CODE_START(secondary_startup_64)
* At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0,
* and someone has loaded a mapped page table.
*
* %rsi holds a physical pointer to real_mode_data.
*
* We come here either from startup_64 (using physical addresses)
* or from trampoline.S (using virtual addresses).
*
......@@ -153,6 +149,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR
/* Clear %R15 which holds the boot_params pointer on the boot CPU */
xorq %r15, %r15
/*
* Retrieve the modifier (SME encryption mask if SME is active) to be
* added to the initial pgdir entry that will be programmed into CR3.
......@@ -199,13 +198,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* hypervisor could lie about the C-bit position to perform a ROP
* attack on the guest by writing to the unencrypted stack and wait for
* the next RET instruction.
* %rsi carries pointer to realmode data and is callee-clobbered. Save
* and restore it.
*/
pushq %rsi
movq %rax, %rdi
call sev_verify_cbit
popq %rsi
/*
* Switch to new page-table
......@@ -365,9 +360,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
wrmsr
/* Setup and Load IDT */
pushq %rsi
call early_setup_idt
popq %rsi
/* Check if nx is implemented */
movl $0x80000001, %eax
......@@ -403,9 +396,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
pushq $0
popfq
/* rsi is pointer to real mode structure with interesting info.
pass it to C */
movq %rsi, %rdi
/* Pass the boot_params pointer as first argument */
movq %r15, %rdi
.Ljump_to_C_code:
/*
......
......@@ -88,6 +88,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \
lib-$(CONFIG_ARM) += arm32-stub.o
lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o
lib-$(CONFIG_X86) += x86-stub.o
lib-$(CONFIG_X86_64) += x86-5lvl.o
lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o
lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o
......
......@@ -106,7 +106,7 @@ efi_status_t handle_kernel_image(unsigned long *image_addr,
*/
status = efi_random_alloc(*reserve_size, min_kimg_align,
reserve_addr, phys_seed,
EFI_LOADER_CODE);
EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
if (status != EFI_SUCCESS)
efi_warn("efi_random_alloc() failed: 0x%lx\n", status);
} else {
......
......@@ -73,6 +73,8 @@ efi_status_t efi_parse_options(char const *cmdline)
efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
} else if (!strcmp(param, "noinitrd")) {
efi_noinitrd = true;
} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
efi_no5lvl = true;
} else if (!strcmp(param, "efi") && val) {
efi_nochunk = parse_option_str(val, "nochunk");
efi_novamap |= parse_option_str(val, "novamap");
......
......@@ -33,6 +33,7 @@
#define EFI_ALLOC_LIMIT ULONG_MAX
#endif
extern bool efi_no5lvl;
extern bool efi_nochunk;
extern bool efi_nokaslr;
extern int efi_loglevel;
......@@ -955,7 +956,7 @@ efi_status_t efi_get_random_bytes(unsigned long size, u8 *out);
efi_status_t efi_random_alloc(unsigned long size, unsigned long align,
unsigned long *addr, unsigned long random_seed,
int memory_type);
int memory_type, unsigned long alloc_limit);
efi_status_t efi_random_get_seed(void);
......
......@@ -16,7 +16,8 @@
*/
static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
unsigned long size,
unsigned long align_shift)
unsigned long align_shift,
u64 alloc_limit)
{
unsigned long align = 1UL << align_shift;
u64 first_slot, last_slot, region_end;
......@@ -29,7 +30,7 @@ static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
return 0;
region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
(u64)EFI_ALLOC_LIMIT);
alloc_limit);
if (region_end < size)
return 0;
......@@ -54,7 +55,8 @@ efi_status_t efi_random_alloc(unsigned long size,
unsigned long align,
unsigned long *addr,
unsigned long random_seed,
int memory_type)
int memory_type,
unsigned long alloc_limit)
{
unsigned long total_slots = 0, target_slot;
unsigned long total_mirrored_slots = 0;
......@@ -76,7 +78,7 @@ efi_status_t efi_random_alloc(unsigned long size,
efi_memory_desc_t *md = (void *)map->map + map_offset;
unsigned long slots;
slots = get_entry_num_slots(md, size, ilog2(align));
slots = get_entry_num_slots(md, size, ilog2(align), alloc_limit);
MD_NUM_SLOTS(md) = slots;
total_slots += slots;
if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
......
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/efi.h>
#include <asm/boot.h>
#include <asm/desc.h>
#include <asm/efi.h>
#include "efistub.h"
#include "x86-stub.h"
bool efi_no5lvl;
static void (*la57_toggle)(void *cr3);
static const struct desc_struct gdt[] = {
[GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
[GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
};
/*
* Enabling (or disabling) 5 level paging is tricky, because it can only be
* done from 32-bit mode with paging disabled. This means not only that the
* code itself must be running from 32-bit addressable physical memory, but
* also that the root page table must be 32-bit addressable, as programming
* a 64-bit value into CR3 when running in 32-bit mode is not supported.
*/
efi_status_t efi_setup_5level_paging(void)
{
u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src;
efi_status_t status;
u8 *la57_code;
if (!efi_is_64bit())
return EFI_SUCCESS;
/* check for 5 level paging support */
if (native_cpuid_eax(0) < 7 ||
!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
return EFI_SUCCESS;
/* allocate some 32-bit addressable memory for code and a page table */
status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
U32_MAX);
if (status != EFI_SUCCESS)
return status;
la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size);
memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
/*
* To avoid the need to allocate a 32-bit addressable stack, the
* trampoline uses a LJMP instruction to switch back to long mode.
* LJMP takes an absolute destination address, which needs to be
* fixed up at runtime.
*/
*(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code;
efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE);
return EFI_SUCCESS;
}
void efi_5level_switch(void)
{
bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
bool have_la57 = native_read_cr4() & X86_CR4_LA57;
bool need_toggle = want_la57 ^ have_la57;
u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
u64 *cr3 = (u64 *)__native_read_cr3();
u64 *new_cr3;
if (!la57_toggle || !need_toggle)
return;
if (!have_la57) {
/*
* 5 level paging will be enabled, so a root level page needs
* to be allocated from the 32-bit addressable physical region,
* with its first entry referring to the existing hierarchy.
*/
new_cr3 = memset(pgt, 0, PAGE_SIZE);
new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
} else {
/* take the new root table pointer from the current entry #0 */
new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
/* copy the new root table if it is not 32-bit addressable */
if ((u64)new_cr3 > U32_MAX)
new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
}
native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt });
la57_toggle(new_cr3);
}
This diff is collapsed.
/* SPDX-License-Identifier: GPL-2.0-only */
#include <linux/efi.h>
extern void trampoline_32bit_src(void *, bool);
extern const u16 trampoline_ljmp_imm_offset;
void efi_adjust_memory_range_protection(unsigned long start,
unsigned long size);
#ifdef CONFIG_X86_64
efi_status_t efi_setup_5level_paging(void);
void efi_5level_switch(void);
#else
static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; }
static inline void efi_5level_switch(void) {}
#endif
......@@ -119,7 +119,7 @@ efi_zboot_entry(efi_handle_t handle, efi_system_table_t *systab)
}
status = efi_random_alloc(alloc_size, min_kimg_align, &image_base,
seed, EFI_LOADER_CODE);
seed, EFI_LOADER_CODE, EFI_ALLOC_LIMIT);
if (status != EFI_SUCCESS) {
efi_err("Failed to allocate memory\n");
goto free_cmdline;
......
......@@ -48,7 +48,7 @@ MALLOC_VISIBLE void *malloc(int size)
if (!malloc_ptr)
malloc_ptr = free_mem_ptr;
malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
malloc_ptr = (malloc_ptr + 7) & ~7; /* Align */
p = (void *)malloc_ptr;
malloc_ptr += size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment