Commit b29f3771 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86-boot-2024-03-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:

 - Continuing work by Ard Biesheuvel to improve the x86 early startup
   code, with the long-term goal to make it position independent:

      - Get rid of early accesses to global objects, either by moving
        them to the stack, deferring the access until later, or dropping
        the globals entirely

      - Move all code that runs early via the 1:1 mapping into
        .head.text, and move code that does not out of it, so that build
        time checks can be added later to ensure that no inadvertent
        absolute references were emitted into code that does not
        tolerate them

      - Remove fixup_pointer() and occurrences of __pa_symbol(), which
        rely on the compiler emitting absolute references, which is not
        guaranteed

 - Improve the early console code

 - Add early console message about ignored NMIs, so that users are at
   least warned about their existence - even if we cannot do anything
   about them

 - Improve the kexec code's kernel load address handling

 - Enable more X86S (simplified x86) bits

 - Simplify early boot GDT handling

 - Micro-optimize the boot code a bit

 - Misc cleanups

* tag 'x86-boot-2024-03-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (22 commits)
  x86/sev: Move early startup code into .head.text section
  x86/sme: Move early SME kernel encryption handling into .head.text
  x86/boot: Move mem_encrypt= parsing to the decompressor
  efi/libstub: Add generic support for parsing mem_encrypt=
  x86/startup_64: Simplify virtual switch on primary boot
  x86/startup_64: Simplify calculation of initial page table address
  x86/startup_64: Defer assignment of 5-level paging global variables
  x86/startup_64: Simplify CR4 handling in startup code
  x86/boot: Use 32-bit XOR to clear registers
  efi/x86: Set the PE/COFF header's NX compat flag unconditionally
  x86/boot/64: Load the final kernel GDT during early boot directly, remove startup_gdt[]
  x86/boot/64: Use RIP_REL_REF() to access early_top_pgt[]
  x86/boot/64: Use RIP_REL_REF() to access early page tables
  x86/boot/64: Use RIP_REL_REF() to access '__supported_pte_mask'
  x86/boot/64: Use RIP_REL_REF() to access early_dynamic_pgts[]
  x86/boot/64: Use RIP_REL_REF() to assign 'phys_base'
  x86/boot/64: Simplify global variable accesses in GDT/IDT programming
  x86/trampoline: Bypass compat mode in trampoline_start64() if not needed
  kexec: Allocate kernel above bzImage's pref_address
  x86/boot: Add a message about ignored early NMIs
  ...
parents e66c58f7 2e2bc42c
......@@ -878,7 +878,8 @@ Protocol: 2.10+
address if possible.
A non-relocatable kernel will unconditionally move itself and to run
at this address.
at this address. A relocatable kernel will move itself to this address if it
loaded below this address.
============ =======
Field name: init_size
......
......@@ -2111,11 +2111,11 @@ config PHYSICAL_START
help
This gives the physical address where the kernel is loaded.
If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
bzImage will decompress itself to above physical address and
run from there. Otherwise, bzImage will run from the address where
it has been loaded by the boot loader and will ignore above physical
address.
If the kernel is not relocatable (CONFIG_RELOCATABLE=n) then bzImage
will decompress itself to above physical address and run from there.
Otherwise, bzImage will run from the address where it has been loaded
by the boot loader. The only exception is if it is loaded below the
above physical address, in which case it will relocate itself there.
In normal kdump cases one does not have to set/change this option
as now bzImage can be compiled as a completely relocatable image
......
......@@ -389,5 +389,5 @@ void do_boot_page_fault(struct pt_regs *regs, unsigned long error_code)
void do_boot_nmi_trap(struct pt_regs *regs, unsigned long error_code)
{
/* Empty handler to ignore NMI during early boot */
spurious_nmi_count++;
}
......@@ -52,6 +52,7 @@ struct port_io_ops pio_ops;
memptr free_mem_ptr;
memptr free_mem_end_ptr;
int spurious_nmi_count;
static char *vidmem;
static int vidport;
......@@ -164,21 +165,34 @@ void __putstr(const char *s)
outb(0xff & (pos >> 1), vidport+1);
}
void __puthex(unsigned long value)
static noinline void __putnum(unsigned long value, unsigned int base,
int mindig)
{
char alpha[2] = "0";
int bits;
char buf[8*sizeof(value)+1];
char *p;
for (bits = sizeof(value) * 8 - 4; bits >= 0; bits -= 4) {
unsigned long digit = (value >> bits) & 0xf;
p = buf + sizeof(buf);
*--p = '\0';
if (digit < 0xA)
alpha[0] = '0' + digit;
else
alpha[0] = 'a' + (digit - 0xA);
while (mindig-- > 0 || value) {
unsigned char digit = value % base;
digit += (digit >= 10) ? ('a'-10) : '0';
*--p = digit;
__putstr(alpha);
value /= base;
}
__putstr(p);
}
void __puthex(unsigned long value)
{
__putnum(value, 16, sizeof(value)*2);
}
void __putdec(unsigned long value)
{
__putnum(value, 10, 1);
}
#ifdef CONFIG_X86_NEED_RELOCS
......@@ -357,6 +371,19 @@ unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr,
return entry;
}
/*
* Set the memory encryption xloadflag based on the mem_encrypt= command line
* parameter, if provided.
*/
static void parse_mem_encrypt(struct setup_header *hdr)
{
int on = cmdline_find_option_bool("mem_encrypt=on");
int off = cmdline_find_option_bool("mem_encrypt=off");
if (on > off)
hdr->xloadflags |= XLF_MEM_ENCRYPTION;
}
/*
* The compressed kernel image (ZO), has been moved so that its position
* is against the end of the buffer used to hold the uncompressed kernel
......@@ -387,6 +414,8 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
/* Clear flags intended for solely in-kernel use. */
boot_params_ptr->hdr.loadflags &= ~KASLR_FLAG;
parse_mem_encrypt(&boot_params_ptr->hdr);
sanitize_boot_params(boot_params_ptr);
if (boot_params_ptr->screen_info.orig_video_mode == 7) {
......@@ -493,6 +522,12 @@ asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output)
/* Disable exception handling before booting the kernel */
cleanup_exception_handling();
if (spurious_nmi_count) {
error_putstr("Spurious early NMIs ignored: ");
error_putdec(spurious_nmi_count);
error_putstr("\n");
}
return output + entry_offset;
}
......
......@@ -59,12 +59,15 @@ extern char _head[], _end[];
/* misc.c */
extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
extern int spurious_nmi_count;
void *malloc(int size);
void free(void *where);
void __putstr(const char *s);
void __puthex(unsigned long value);
void __putdec(unsigned long value);
#define error_putstr(__x) __putstr(__x)
#define error_puthex(__x) __puthex(__x)
#define error_putdec(__x) __putdec(__x)
#ifdef CONFIG_X86_VERBOSE_BOOTUP
......
......@@ -117,6 +117,9 @@ static bool fault_in_kernel_space(unsigned long address)
#undef __init
#define __init
#undef __head
#define __head
#define __BOOT_COMPRESSED
/* Basic instruction decoding support needed */
......
......@@ -111,11 +111,7 @@ extra_header_fields:
.long salign # SizeOfHeaders
.long 0 # CheckSum
.word IMAGE_SUBSYSTEM_EFI_APPLICATION # Subsystem (EFI application)
#ifdef CONFIG_EFI_DXE_MEM_ATTRIBUTES
.word IMAGE_DLL_CHARACTERISTICS_NX_COMPAT # DllCharacteristics
#else
.word 0 # DllCharacteristics
#endif
#ifdef CONFIG_X86_32
.long 0 # SizeOfStackReserve
.long 0 # SizeOfStackCommit
......
......@@ -46,6 +46,7 @@ struct gdt_page {
} __attribute__((aligned(PAGE_SIZE)));
DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
DECLARE_INIT_PER_CPU(gdt_page);
/* Provide the original GDT */
static inline struct desc_struct *get_cpu_gdt_rw(unsigned int cpu)
......
......@@ -47,8 +47,8 @@ void __init sme_unmap_bootdata(char *real_mode_data);
void __init sme_early_init(void);
void __init sme_encrypt_kernel(struct boot_params *bp);
void __init sme_enable(struct boot_params *bp);
void sme_encrypt_kernel(struct boot_params *bp);
void sme_enable(struct boot_params *bp);
int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size);
int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size);
......@@ -81,8 +81,8 @@ static inline void __init sme_unmap_bootdata(char *real_mode_data) { }
static inline void __init sme_early_init(void) { }
static inline void __init sme_encrypt_kernel(struct boot_params *bp) { }
static inline void __init sme_enable(struct boot_params *bp) { }
static inline void sme_encrypt_kernel(struct boot_params *bp) { }
static inline void sme_enable(struct boot_params *bp) { }
static inline void sev_es_init_vc_handling(void) { }
......
......@@ -21,9 +21,9 @@ typedef unsigned long pgprotval_t;
typedef struct { pteval_t pte; } pte_t;
typedef struct { pmdval_t pmd; } pmd_t;
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;
#ifdef CONFIG_X86_5LEVEL
#ifdef USE_EARLY_PGTABLE_L5
/*
* cpu_feature_enabled() is not available in early boot code.
......
......@@ -48,7 +48,7 @@ extern unsigned long saved_video_mode;
extern void reserve_standard_io_resources(void);
extern void i386_reserve_resources(void);
extern unsigned long __startup_64(unsigned long physaddr, struct boot_params *bp);
extern void startup_64_setup_env(unsigned long physbase);
extern void startup_64_setup_gdt_idt(void);
extern void early_setup_idt(void);
extern void __init do_early_exception(struct pt_regs *regs, int trapnr);
......
......@@ -214,16 +214,16 @@ static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate)
struct snp_guest_request_ioctl;
void setup_ghcb(void);
void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr,
unsigned long npages);
void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op);
void snp_set_memory_shared(unsigned long vaddr, unsigned long npages);
void snp_set_memory_private(unsigned long vaddr, unsigned long npages);
void snp_set_wakeup_secondary_cpu(void);
bool snp_init(struct boot_params *bp);
void __init __noreturn snp_abort(void);
void __noreturn snp_abort(void);
int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio);
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
......
......@@ -24,6 +24,7 @@
#define XLF_EFI_KEXEC (1<<4)
#define XLF_5LEVEL (1<<5)
#define XLF_5LEVEL_ENABLED (1<<6)
#define XLF_MEM_ENCRYPTION (1<<7)
#ifndef __ASSEMBLY__
......
This diff is collapsed.
......@@ -40,7 +40,6 @@ L4_START_KERNEL = l4_index(__START_KERNEL_map)
L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text
__HEAD
.code64
SYM_CODE_START_NOALIGN(startup_64)
......@@ -69,8 +68,6 @@ SYM_CODE_START_NOALIGN(startup_64)
/* Set up the stack for verify_cpu() */
leaq (__end_init_task - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE)(%rip), %rsp
leaq _text(%rip), %rdi
/* Setup GSBASE to allow stack canary access for C code */
movl $MSR_GS_BASE, %ecx
leaq INIT_PER_CPU_VAR(fixed_percpu_data)(%rip), %rdx
......@@ -78,7 +75,7 @@ SYM_CODE_START_NOALIGN(startup_64)
shrq $32, %rdx
wrmsr
call startup_64_setup_env
call startup_64_setup_gdt_idt
/* Now switch to __KERNEL_CS so IRET works reliably */
pushq $__KERNEL_CS
......@@ -114,13 +111,11 @@ SYM_CODE_START_NOALIGN(startup_64)
call __startup_64
/* Form the CR3 value being sure to include the CR3 modifier */
addq $(early_top_pgt - __START_KERNEL_map), %rax
leaq early_top_pgt(%rip), %rcx
addq %rcx, %rax
#ifdef CONFIG_AMD_MEM_ENCRYPT
mov %rax, %rdi
mov %rax, %r14
addq phys_base(%rip), %rdi
/*
* For SEV guests: Verify that the C-bit is correct. A malicious
......@@ -129,17 +124,23 @@ SYM_CODE_START_NOALIGN(startup_64)
* the next RET instruction.
*/
call sev_verify_cbit
#endif
/*
* Restore CR3 value without the phys_base which will be added
* below, before writing %cr3.
* Switch to early_top_pgt which still has the identity mappings
* present.
*/
mov %r14, %rax
#endif
movq %rax, %cr3
jmp 1f
/* Branch to the common startup code at its kernel virtual address */
ANNOTATE_RETPOLINE_SAFE
jmp *0f(%rip)
SYM_CODE_END(startup_64)
__INITRODATA
0: .quad common_startup_64
.text
SYM_CODE_START(secondary_startup_64)
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR
......@@ -172,22 +173,39 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
ANNOTATE_NOENDBR
/* Clear %R15 which holds the boot_params pointer on the boot CPU */
xorq %r15, %r15
xorl %r15d, %r15d
/* Derive the runtime physical address of init_top_pgt[] */
movq phys_base(%rip), %rax
addq $(init_top_pgt - __START_KERNEL_map), %rax
/*
* Retrieve the modifier (SME encryption mask if SME is active) to be
* added to the initial pgdir entry that will be programmed into CR3.
*/
#ifdef CONFIG_AMD_MEM_ENCRYPT
movq sme_me_mask, %rax
#else
xorq %rax, %rax
addq sme_me_mask(%rip), %rax
#endif
/*
* Switch to the init_top_pgt here, away from the trampoline_pgd and
* unmap the identity mapped ranges.
*/
movq %rax, %cr3
/* Form the CR3 value being sure to include the CR3 modifier */
addq $(init_top_pgt - __START_KERNEL_map), %rax
1:
SYM_INNER_LABEL(common_startup_64, SYM_L_LOCAL)
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR
/*
* Create a mask of CR4 bits to preserve. Omit PGE in order to flush
* global 1:1 translations from the TLBs.
*
* From the SDM:
* "If CR4.PGE is changing from 0 to 1, there were no global TLB
* entries before the execution; if CR4.PGE is changing from 1 to 0,
* there will be no global TLB entries after the execution."
*/
movl $(X86_CR4_PAE | X86_CR4_LA57), %edx
#ifdef CONFIG_X86_MCE
/*
* Preserve CR4.MCE if the kernel will enable #MC support.
......@@ -196,52 +214,20 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
* configured will crash the system regardless of the CR4.MCE value set
* here.
*/
movq %cr4, %rcx
andl $X86_CR4_MCE, %ecx
#else
movl $0, %ecx
orl $X86_CR4_MCE, %edx
#endif
movq %cr4, %rcx
andl %edx, %ecx
/* Enable PAE mode, PSE, PGE and LA57 */
orl $(X86_CR4_PAE | X86_CR4_PSE | X86_CR4_PGE), %ecx
#ifdef CONFIG_X86_5LEVEL
testb $1, __pgtable_l5_enabled(%rip)
jz 1f
orl $X86_CR4_LA57, %ecx
1:
#endif
/* Even if ignored in long mode, set PSE uniformly on all logical CPUs. */
btsl $X86_CR4_PSE_BIT, %ecx
movq %rcx, %cr4
/* Setup early boot stage 4-/5-level pagetables. */
addq phys_base(%rip), %rax
/*
* Switch to new page-table
*
* For the boot CPU this switches to early_top_pgt which still has the
* identity mappings present. The secondary CPUs will switch to the
* init_top_pgt here, away from the trampoline_pgd and unmap the
* identity mapped ranges.
* Set CR4.PGE to re-enable global translations.
*/
movq %rax, %cr3
/*
* Do a global TLB flush after the CR3 switch to make sure the TLB
* entries from the identity mapping are flushed.
*/
movq %cr4, %rcx
movq %rcx, %rax
xorq $X86_CR4_PGE, %rcx
btsl $X86_CR4_PGE_BIT, %ecx
movq %rcx, %cr4
movq %rax, %cr4
/* Ensure I am executing from virtual addresses */
movq $1f, %rax
ANNOTATE_RETPOLINE_SAFE
jmp *%rax
1:
UNWIND_HINT_END_OF_STACK
ANNOTATE_NOENDBR // above
#ifdef CONFIG_SMP
/*
......@@ -298,7 +284,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
.Llookup_AP:
/* EAX contains the APIC ID of the current CPU */
xorq %rcx, %rcx
xorl %ecx, %ecx
leaq cpuid_to_apicid(%rip), %rbx
.Lfind_cpunr:
......@@ -429,39 +415,10 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL)
movq %r15, %rdi
.Ljump_to_C_code:
/*
* Jump to run C code and to be on a real kernel address.
* Since we are running on identity-mapped space we have to jump
* to the full 64bit address, this is only possible as indirect
* jump. In addition we need to ensure %cs is set so we make this
* a far return.
*
* Note: do not change to far jump indirect with 64bit offset.
*
* AMD does not support far jump indirect with 64bit offset.
* AMD64 Architecture Programmer's Manual, Volume 3: states only
* JMP FAR mem16:16 FF /5 Far jump indirect,
* with the target specified by a far pointer in memory.
* JMP FAR mem16:32 FF /5 Far jump indirect,
* with the target specified by a far pointer in memory.
*
* Intel64 does support 64bit offset.
* Software Developer Manual Vol 2: states:
* FF /5 JMP m16:16 Jump far, absolute indirect,
* address given in m16:16
* FF /5 JMP m16:32 Jump far, absolute indirect,
* address given in m16:32.
* REX.W + FF /5 JMP m16:64 Jump far, absolute indirect,
* address given in m16:64.
*/
pushq $.Lafter_lret # put return address on stack for unwinder
xorl %ebp, %ebp # clear frame pointer
movq initial_code(%rip), %rax
pushq $__KERNEL_CS # set correct cs
pushq %rax # target address in negative space
lretq
.Lafter_lret:
ANNOTATE_NOENDBR
ANNOTATE_RETPOLINE_SAFE
callq *initial_code(%rip)
ud2
SYM_CODE_END(secondary_startup_64)
#include "verify_cpu.S"
......@@ -656,7 +613,8 @@ SYM_CODE_END(vc_no_ghcb)
.balign 4
SYM_DATA_START_PTI_ALIGNED(early_top_pgt)
.fill 512,8,0
.fill 511,8,0
.quad level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE_NOENC
.fill PTI_USER_PGD_FILL,8,0
SYM_DATA_END(early_top_pgt)
......
......@@ -503,7 +503,10 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
kbuf.bufsz = kernel_len - kern16_size;
kbuf.memsz = PAGE_ALIGN(header->init_size);
kbuf.buf_align = header->kernel_alignment;
kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
if (header->pref_address < MIN_KERNEL_LOAD_ADDR)
kbuf.buf_min = MIN_KERNEL_LOAD_ADDR;
else
kbuf.buf_min = header->pref_address;
kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
ret = kexec_add_buffer(&kbuf);
if (ret)
......
......@@ -95,7 +95,8 @@ static bool __init sev_es_check_cpu_features(void)
return true;
}
static void __noreturn sev_es_terminate(unsigned int set, unsigned int reason)
static void __head __noreturn
sev_es_terminate(unsigned int set, unsigned int reason)
{
u64 val = GHCB_MSR_TERM_REQ;
......@@ -332,13 +333,7 @@ static int sev_cpuid_hv(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid
*/
static const struct snp_cpuid_table *snp_cpuid_get_table(void)
{
void *ptr;
asm ("lea cpuid_table_copy(%%rip), %0"
: "=r" (ptr)
: "p" (&cpuid_table_copy));
return ptr;
return &RIP_REL_REF(cpuid_table_copy);
}
/*
......@@ -397,7 +392,7 @@ static u32 snp_cpuid_calc_xsave_size(u64 xfeatures_en, bool compacted)
return xsave_size;
}
static bool
static bool __head
snp_cpuid_get_validated_func(struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
......@@ -534,7 +529,8 @@ static int snp_cpuid_postprocess(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
* Returns -EOPNOTSUPP if feature not enabled. Any other non-zero return value
* should be treated as fatal by caller.
*/
static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
static int __head
snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_leaf *leaf)
{
const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table();
......@@ -576,7 +572,7 @@ static int snp_cpuid(struct ghcb *ghcb, struct es_em_ctxt *ctxt, struct cpuid_le
* page yet, so it only supports the MSR based communication with the
* hypervisor and only the CPUID exit-code.
*/
void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
void __head do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
{
unsigned int subfn = lower_bits(regs->cx, 32);
unsigned int fn = lower_bits(regs->ax, 32);
......@@ -1027,7 +1023,8 @@ struct cc_setup_data {
* Search for a Confidential Computing blob passed in as a setup_data entry
* via the Linux Boot Protocol.
*/
static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
static __head
struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
{
struct cc_setup_data *sd = NULL;
struct setup_data *hdr;
......@@ -1054,7 +1051,7 @@ static struct cc_blob_sev_info *find_cc_blob_setup_data(struct boot_params *bp)
* mapping needs to be updated in sync with all the changes to virtual memory
* layout and related mapping facilities throughout the boot process.
*/
static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
static void __head setup_cpuid_table(const struct cc_blob_sev_info *cc_info)
{
const struct snp_cpuid_table *cpuid_table_fw, *cpuid_table;
int i;
......
......@@ -25,6 +25,7 @@
#include <linux/psp-sev.h>
#include <uapi/linux/sev-guest.h>
#include <asm/init.h>
#include <asm/cpu_entry_area.h>
#include <asm/stacktrace.h>
#include <asm/sev.h>
......@@ -701,8 +702,9 @@ static u64 __init get_jump_table_addr(void)
return ret;
}
static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
unsigned long npages, enum psc_op op)
static void __head
early_set_pages_state(unsigned long vaddr, unsigned long paddr,
unsigned long npages, enum psc_op op)
{
unsigned long paddr_end;
u64 val;
......@@ -758,7 +760,7 @@ static void early_set_pages_state(unsigned long vaddr, unsigned long paddr,
sev_es_terminate(SEV_TERM_SET_LINUX, GHCB_TERM_PSC);
}
void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
void __head early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr,
unsigned long npages)
{
/*
......@@ -2081,7 +2083,7 @@ bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
*
* Scan for the blob in that order.
*/
static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
static __head struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
......@@ -2107,7 +2109,7 @@ static __init struct cc_blob_sev_info *find_cc_blob(struct boot_params *bp)
return cc_info;
}
bool __init snp_init(struct boot_params *bp)
bool __head snp_init(struct boot_params *bp)
{
struct cc_blob_sev_info *cc_info;
......@@ -2129,7 +2131,7 @@ bool __init snp_init(struct boot_params *bp)
return true;
}
void __init __noreturn snp_abort(void)
void __head __noreturn snp_abort(void)
{
sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED);
}
......
......@@ -77,7 +77,7 @@ SYM_FUNC_START(sev_verify_cbit)
* The check failed, prevent any forward progress to prevent ROP
* attacks, invalidate the stack and go into a hlt loop.
*/
xorq %rsp, %rsp
xorl %esp, %esp
subq $0x1000, %rsp
2: hlt
jmp 2b
......
......@@ -14,19 +14,6 @@ ifdef CONFIG_KCSAN
CFLAGS_REMOVE_delay.o = $(CC_FLAGS_FTRACE)
endif
# Early boot use of cmdline; don't instrument it
ifdef CONFIG_AMD_MEM_ENCRYPT
KCOV_INSTRUMENT_cmdline.o := n
KASAN_SANITIZE_cmdline.o := n
KCSAN_SANITIZE_cmdline.o := n
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_cmdline.o = -pg
endif
CFLAGS_cmdline.o := -fno-stack-protector -fno-jump-tables
endif
inat_tables_script = $(srctree)/arch/x86/tools/gen-insn-attr-x86.awk
inat_tables_maps = $(srctree)/arch/x86/lib/x86-opcode-map.txt
quiet_cmd_inat_tables = GEN $@
......
......@@ -41,9 +41,9 @@
#include <linux/mem_encrypt.h>
#include <linux/cc_platform.h>
#include <asm/init.h>
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cmdline.h>
#include <asm/coco.h>
#include <asm/sev.h>
......@@ -95,10 +95,7 @@ struct sme_populate_pgd_data {
*/
static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
static char sme_cmdline_arg[] __initdata = "mem_encrypt";
static char sme_cmdline_on[] __initdata = "on";
static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
static void __head sme_clear_pgd(struct sme_populate_pgd_data *ppd)
{
unsigned long pgd_start, pgd_end, pgd_size;
pgd_t *pgd_p;
......@@ -113,7 +110,7 @@ static void __init sme_clear_pgd(struct sme_populate_pgd_data *ppd)
memset(pgd_p, 0, pgd_size);
}
static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
static pud_t __head *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
{
pgd_t *pgd;
p4d_t *p4d;
......@@ -150,7 +147,7 @@ static pud_t __init *sme_prepare_pgd(struct sme_populate_pgd_data *ppd)
return pud;
}
static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
static void __head sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
......@@ -166,7 +163,7 @@ static void __init sme_populate_pgd_large(struct sme_populate_pgd_data *ppd)
set_pmd(pmd, __pmd(ppd->paddr | ppd->pmd_flags));
}
static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
static void __head sme_populate_pgd(struct sme_populate_pgd_data *ppd)
{
pud_t *pud;
pmd_t *pmd;
......@@ -192,7 +189,7 @@ static void __init sme_populate_pgd(struct sme_populate_pgd_data *ppd)
set_pte(pte, __pte(ppd->paddr | ppd->pte_flags));
}
static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
static void __head __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd_large(ppd);
......@@ -202,7 +199,7 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
}
}
static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
static void __head __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
{
while (ppd->vaddr < ppd->vaddr_end) {
sme_populate_pgd(ppd);
......@@ -212,7 +209,7 @@ static void __init __sme_map_range_pte(struct sme_populate_pgd_data *ppd)
}
}
static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
static void __head __sme_map_range(struct sme_populate_pgd_data *ppd,
pmdval_t pmd_flags, pteval_t pte_flags)
{
unsigned long vaddr_end;
......@@ -236,22 +233,22 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
__sme_map_range_pte(ppd);
}
static void __init sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
static void __head sme_map_range_encrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_ENC, PTE_FLAGS_ENC);
}
static void __init sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
static void __head sme_map_range_decrypted(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC, PTE_FLAGS_DEC);
}
static void __init sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
static void __head sme_map_range_decrypted_wp(struct sme_populate_pgd_data *ppd)
{
__sme_map_range(ppd, PMD_FLAGS_DEC_WP, PTE_FLAGS_DEC_WP);
}
static unsigned long __init sme_pgtable_calc(unsigned long len)
static unsigned long __head sme_pgtable_calc(unsigned long len)
{
unsigned long entries = 0, tables = 0;
......@@ -288,7 +285,7 @@ static unsigned long __init sme_pgtable_calc(unsigned long len)
return entries + tables;
}
void __init sme_encrypt_kernel(struct boot_params *bp)
void __head sme_encrypt_kernel(struct boot_params *bp)
{
unsigned long workarea_start, workarea_end, workarea_len;
unsigned long execute_start, execute_end, execute_len;
......@@ -323,9 +320,8 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* memory from being cached.
*/
/* Physical addresses gives us the identity mapped virtual addresses */
kernel_start = __pa_symbol(_text);
kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE);
kernel_start = (unsigned long)RIP_REL_REF(_text);
kernel_end = ALIGN((unsigned long)RIP_REL_REF(_end), PMD_SIZE);
kernel_len = kernel_end - kernel_start;
initrd_start = 0;
......@@ -342,14 +338,6 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
}
#endif
/*
* We're running identity mapped, so we must obtain the address to the
* SME encryption workarea using rip-relative addressing.
*/
asm ("lea sme_workarea(%%rip), %0"
: "=r" (workarea_start)
: "p" (sme_workarea));
/*
* Calculate required number of workarea bytes needed:
* executable encryption area size:
......@@ -359,7 +347,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
* pagetable structures for the encryption of the kernel
* pagetable structures for workarea (in case not currently mapped)
*/
execute_start = workarea_start;
execute_start = workarea_start = (unsigned long)RIP_REL_REF(sme_workarea);
execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
execute_len = execute_end - execute_start;
......@@ -502,13 +490,11 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
native_write_cr3(__native_read_cr3());
}
void __init sme_enable(struct boot_params *bp)
void __head sme_enable(struct boot_params *bp)
{
const char *cmdline_ptr, *cmdline_arg, *cmdline_on;
unsigned int eax, ebx, ecx, edx;
unsigned long feature_mask;
unsigned long me_mask;
char buffer[16];
bool snp;
u64 msr;
......@@ -551,6 +537,9 @@ void __init sme_enable(struct boot_params *bp)
/* Check if memory encryption is enabled */
if (feature_mask == AMD_SME_BIT) {
if (!(bp->hdr.xloadflags & XLF_MEM_ENCRYPTION))
return;
/*
* No SME if Hypervisor bit is set. This check is here to
* prevent a guest from trying to enable SME. For running as a
......@@ -570,31 +559,8 @@ void __init sme_enable(struct boot_params *bp)
msr = __rdmsr(MSR_AMD64_SYSCFG);
if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
return;
} else {
/* SEV state cannot be controlled by a command line option */
goto out;
}
/*
* Fixups have not been applied to phys_base yet and we're running
* identity mapped, so we must obtain the address to the SME command
* line argument data using rip-relative addressing.
*/
asm ("lea sme_cmdline_arg(%%rip), %0"
: "=r" (cmdline_arg)
: "p" (sme_cmdline_arg));
asm ("lea sme_cmdline_on(%%rip), %0"
: "=r" (cmdline_on)
: "p" (sme_cmdline_on));
cmdline_ptr = (const char *)((u64)bp->hdr.cmd_line_ptr |
((u64)bp->ext_cmd_line_ptr << 32));
if (cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer)) < 0 ||
strncmp(buffer, cmdline_on, sizeof(buffer)))
return;
out:
RIP_REL_REF(sme_me_mask) = me_mask;
physical_mask &= ~me_mask;
cc_vendor = CC_VENDOR_AMD;
......
......@@ -37,13 +37,15 @@
.text
.code16
.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0
.macro LOCK_AND_LOAD_REALMODE_ESP lock_pa=0 lock_rip=0
/*
* Make sure only one CPU fiddles with the realmode stack
*/
.Llock_rm\@:
.if \lock_pa
lock btsl $0, pa_tr_lock
.elseif \lock_rip
lock btsl $0, tr_lock(%rip)
.else
lock btsl $0, tr_lock
.endif
......@@ -220,6 +222,35 @@ SYM_CODE_START(trampoline_start64)
lidt tr_idt(%rip)
lgdt tr_gdt64(%rip)
/* Check if paging mode has to be changed */
movq %cr4, %rax
xorl tr_cr4(%rip), %eax
testl $X86_CR4_LA57, %eax
jnz .L_switch_paging
/* Paging mode is correct proceed in 64-bit mode */
LOCK_AND_LOAD_REALMODE_ESP lock_rip=1
movw $__KERNEL_DS, %dx
movl %edx, %ss
addl $pa_real_mode_base, %esp
movl %edx, %ds
movl %edx, %es
movl %edx, %fs
movl %edx, %gs
movl $pa_trampoline_pgd, %eax
movq %rax, %cr3
pushq $__KERNEL_CS
pushq tr_start(%rip)
lretq
.L_switch_paging:
/*
* To switch between 4- and 5-level paging modes, it is necessary
* to disable paging. This must be done in the compatibility mode.
*/
ljmpl *tr_compat(%rip)
SYM_CODE_END(trampoline_start64)
......
......@@ -24,6 +24,8 @@ static bool efi_noinitrd;
static bool efi_nosoftreserve;
static bool efi_disable_pci_dma = IS_ENABLED(CONFIG_EFI_DISABLE_PCI_DMA);
int efi_mem_encrypt;
bool __pure __efi_soft_reserve_enabled(void)
{
return !efi_nosoftreserve;
......@@ -75,6 +77,12 @@ efi_status_t efi_parse_options(char const *cmdline)
efi_noinitrd = true;
} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
efi_no5lvl = true;
} else if (IS_ENABLED(CONFIG_ARCH_HAS_MEM_ENCRYPT) &&
!strcmp(param, "mem_encrypt") && val) {
if (parse_option_str(val, "on"))
efi_mem_encrypt = 1;
else if (parse_option_str(val, "off"))
efi_mem_encrypt = -1;
} else if (!strcmp(param, "efi") && val) {
efi_nochunk = parse_option_str(val, "nochunk");
efi_novamap |= parse_option_str(val, "novamap");
......
......@@ -37,8 +37,8 @@ extern bool efi_no5lvl;
extern bool efi_nochunk;
extern bool efi_nokaslr;
extern int efi_loglevel;
extern int efi_mem_encrypt;
extern bool efi_novamap;
extern const efi_system_table_t *efi_system_table;
typedef union efi_dxe_services_table efi_dxe_services_table_t;
......
......@@ -884,6 +884,9 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
}
}
if (efi_mem_encrypt > 0)
hdr->xloadflags |= XLF_MEM_ENCRYPTION;
status = efi_decompress_kernel(&kernel_entry);
if (status != EFI_SUCCESS) {
efi_err("Failed to decompress kernel\n");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment