Commit 291f3632 authored by Matt Fleming's avatar Matt Fleming Committed by H. Peter Anvin

x86, efi: EFI boot stub support

There is currently a large divide between kernel development and the
development of EFI boot loaders. The idea behind this patch is to give
the kernel developers full control over the EFI boot process. As
H. Peter Anvin put it,

"The 'kernel carries its own stub' approach been very successful in
dealing with BIOS, and would make a lot of sense to me for EFI as
well."

This patch introduces an EFI boot stub that allows an x86 bzImage to
be loaded and executed by EFI firmware. The bzImage appears to the
firmware as an EFI application. Luckily there are enough free bits
within the bzImage header so that it can masquerade as an EFI
application, thereby coercing the EFI firmware into loading it and
jumping to its entry point. The beauty of this masquerading approach
is that both BIOS and EFI boot loaders can still load and run the same
bzImage, thereby allowing a single kernel image to work in any boot
environment.

The EFI boot stub supports multiple initrds, but they must exist on
the same partition as the bzImage. Command-line arguments for the
kernel can be appended after the bzImage name when run from the EFI
shell, e.g.

Shell> bzImage console=ttyS0 root=/dev/sdb initrd=initrd.img

v7:
 - Fix checkpatch warnings.

v6:

 - Try to allocate initrd memory just below hdr->inird_addr_max.

v5:

 - load_options_size is UTF-16, which needs dividing by 2 to convert
   to the corresponding ASCII size.

v4:

 - Don't read more than image->load_options_size

v3:

 - Fix following warnings when compiling CONFIG_EFI_STUB=n

   arch/x86/boot/tools/build.c: In function ‘main’:
   arch/x86/boot/tools/build.c:138:24: warning: unused variable ‘pe_header’
   arch/x86/boot/tools/build.c:138:15: warning: unused variable ‘file_sz’

 - As reported by Matthew Garrett, some Apple machines have GOPs that
   don't have hardware attached. We need to weed these out by
   searching for ones that handle the PCIIO protocol.

 - Don't allocate memory if no initrds are on cmdline
 - Don't trust image->load_options_size

Maarten Lankhorst noted:
 - Don't strip first argument when booted from efibootmgr
 - Don't allocate too much memory for cmdline
 - Don't update cmdline_size, the kernel considers it read-only
 - Don't accept '\n' for initrd names

v2:

 - File alignment was too large, was 8192 should be 512. Reported by
   Maarten Lankhorst on LKML.
 - Added UGA support for graphics
 - Use VIDEO_TYPE_EFI instead of hard-coded number.
 - Move linelength assignment until after we've assigned depth
 - Dynamically fill out AddressOfEntryPoint in tools/build.c
 - Don't use magic number for GDT/TSS stuff. Requested by Andi Kleen
 - The bzImage may need to be relocated as it may have been loaded at
   a high address address by the firmware. This was required to get my
   macbook booting because the firmware loaded it at 0x7cxxxxxx, which
   triggers this error in decompress_kernel(),

	if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff))
		error("Destination address too large");

Cc: Mike Waychison <mikew@google.com>
Cc: Matthew Garrett <mjg@redhat.com>
Tested-by: default avatarHenrik Rydberg <rydberg@euromail.se>
Signed-off-by: default avatarMatt Fleming <matt.fleming@intel.com>
Link: http://lkml.kernel.org/r/1321383097.2657.9.camel@mfleming-mobl1.ger.corp.intel.comSigned-off-by: default avatarH. Peter Anvin <hpa@linux.intel.com>
parent 55839d51
......@@ -1478,6 +1478,13 @@ config EFI
resultant kernel should continue to boot on existing non-EFI
platforms.
config EFI_STUB
bool "EFI stub support"
depends on EFI
---help---
This kernel feature allows a bzImage to be loaded directly
by EFI firmware without the use of a bootloader.
config SECCOMP
def_bool y
prompt "Enable seccomp to safely compute untrusted bytecode"
......
......@@ -23,7 +23,15 @@ LDFLAGS_vmlinux := -T
hostprogs-y := mkpiggy
$(obj)/vmlinux: $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o $(obj)/piggy.o FORCE
VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
$(obj)/piggy.o
ifeq ($(CONFIG_EFI_STUB), y)
VMLINUX_OBJS += $(obj)/eboot.o $(obj)/efi_stub_$(BITS).o
endif
$(obj)/vmlinux: $(VMLINUX_OBJS) FORCE
$(call if_changed,ld)
@:
......
This diff is collapsed.
#ifndef BOOT_COMPRESSED_EBOOT_H
#define BOOT_COMPRESSED_EBOOT_H
#define SEG_TYPE_DATA (0 << 3)
#define SEG_TYPE_READ_WRITE (1 << 1)
#define SEG_TYPE_CODE (1 << 3)
#define SEG_TYPE_EXEC_READ (1 << 1)
#define SEG_TYPE_TSS ((1 << 3) | (1 << 0))
#define SEG_OP_SIZE_32BIT (1 << 0)
#define SEG_GRANULARITY_4KB (1 << 0)
#define DESC_TYPE_CODE_DATA (1 << 0)
#define EFI_PAGE_SIZE (1UL << EFI_PAGE_SHIFT)
#define PIXEL_RGB_RESERVED_8BIT_PER_COLOR 0
#define PIXEL_BGR_RESERVED_8BIT_PER_COLOR 1
#define PIXEL_BIT_MASK 2
#define PIXEL_BLT_ONLY 3
#define PIXEL_FORMAT_MAX 4
struct efi_pixel_bitmask {
u32 red_mask;
u32 green_mask;
u32 blue_mask;
u32 reserved_mask;
};
struct efi_graphics_output_mode_info {
u32 version;
u32 horizontal_resolution;
u32 vertical_resolution;
int pixel_format;
struct efi_pixel_bitmask pixel_information;
u32 pixels_per_scan_line;
} __packed;
struct efi_graphics_output_protocol_mode {
u32 max_mode;
u32 mode;
unsigned long info;
unsigned long size_of_info;
u64 frame_buffer_base;
unsigned long frame_buffer_size;
} __packed;
struct efi_graphics_output_protocol {
void *query_mode;
unsigned long set_mode;
unsigned long blt;
struct efi_graphics_output_protocol_mode *mode;
};
struct efi_uga_draw_protocol {
void *get_mode;
void *set_mode;
void *blt;
};
#endif /* BOOT_COMPRESSED_EBOOT_H */
/*
* EFI call stub for IA32.
*
* This stub allows us to make EFI calls in physical mode with interrupts
* turned off. Note that this implementation is different from the one in
* arch/x86/platform/efi/efi_stub_32.S because we're _already_ in physical
* mode at this point.
*/
#include <linux/linkage.h>
#include <asm/page_types.h>
/*
* efi_call_phys(void *, ...) is a function with variable parameters.
* All the callers of this function assure that all the parameters are 4-bytes.
*/
/*
* In gcc calling convention, EBX, ESP, EBP, ESI and EDI are all callee save.
* So we'd better save all of them at the beginning of this function and restore
* at the end no matter how many we use, because we can not assure EFI runtime
* service functions will comply with gcc calling convention, too.
*/
.text
ENTRY(efi_call_phys)
/*
* 0. The function can only be called in Linux kernel. So CS has been
* set to 0x0010, DS and SS have been set to 0x0018. In EFI, I found
* the values of these registers are the same. And, the corresponding
* GDT entries are identical. So I will do nothing about segment reg
* and GDT, but change GDT base register in prelog and epilog.
*/
/*
* 1. Because we haven't been relocated by this point we need to
* use relative addressing.
*/
call 1f
1: popl %edx
subl $1b, %edx
/*
* 2. Now on the top of stack is the return
* address in the caller of efi_call_phys(), then parameter 1,
* parameter 2, ..., param n. To make things easy, we save the return
* address of efi_call_phys in a global variable.
*/
popl %ecx
movl %ecx, saved_return_addr(%edx)
/* get the function pointer into ECX*/
popl %ecx
movl %ecx, efi_rt_function_ptr(%edx)
/*
* 3. Call the physical function.
*/
call *%ecx
/*
* 4. Balance the stack. And because EAX contain the return value,
* we'd better not clobber it. We need to calculate our address
* again because %ecx and %edx are not preserved across EFI function
* calls.
*/
call 1f
1: popl %edx
subl $1b, %edx
movl efi_rt_function_ptr(%edx), %ecx
pushl %ecx
/*
* 10. Push the saved return address onto the stack and return.
*/
movl saved_return_addr(%edx), %ecx
pushl %ecx
ret
ENDPROC(efi_call_phys)
.previous
.data
saved_return_addr:
.long 0
efi_rt_function_ptr:
.long 0
#include "../../platform/efi/efi_stub_64.S"
......@@ -32,6 +32,28 @@
__HEAD
ENTRY(startup_32)
#ifdef CONFIG_EFI_STUB
/*
* We don't need the return address, so set up the stack so
* efi_main() can find its arugments.
*/
add $0x4, %esp
call efi_main
cmpl $0, %eax
je preferred_addr
movl %eax, %esi
call 1f
1:
popl %eax
subl $1b, %eax
subl BP_pref_address(%esi), %eax
add BP_code32_start(%esi), %eax
leal preferred_addr(%eax), %eax
jmp *%eax
preferred_addr:
#endif
cld
/*
* Test KEEP_SEGMENTS flag to see if the bootloader is asking
......
......@@ -199,6 +199,26 @@ ENTRY(startup_64)
* an identity mapped page table being provied that maps our
* entire text+data+bss and hopefully all of memory.
*/
#ifdef CONFIG_EFI_STUB
pushq %rsi
mov %rcx, %rdi
mov %rdx, %rsi
call efi_main
popq %rsi
cmpq $0,%rax
je preferred_addr
movq %rax,%rsi
call 1f
1:
popq %rax
subq $1b, %rax
subq BP_pref_address(%rsi), %rax
add BP_code32_start(%esi), %eax
leaq preferred_addr(%rax), %rax
jmp *%rax
preferred_addr:
#endif
/* Setup data segments. */
xorl %eax, %eax
......
#include "misc.h"
int memcmp(const void *s1, const void *s2, size_t len)
{
u8 diff;
asm("repe; cmpsb; setnz %0"
: "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len));
return diff;
}
#include "../string.c"
......@@ -45,6 +45,11 @@ SYSSEG = 0x1000 /* historical load address >> 4 */
.global bootsect_start
bootsect_start:
#ifdef CONFIG_EFI_STUB
# "MZ", MS-DOS header
.byte 0x4d
.byte 0x5a
#endif
# Normalize the start address
ljmp $BOOTSEG, $start2
......@@ -79,6 +84,14 @@ bs_die:
# invoke the BIOS reset code...
ljmp $0xf000,$0xfff0
#ifdef CONFIG_EFI_STUB
.org 0x3c
#
# Offset to the PE header.
#
.long pe_header
#endif /* CONFIG_EFI_STUB */
.section ".bsdata", "a"
bugger_off_msg:
.ascii "Direct booting from floppy is no longer supported.\r\n"
......@@ -87,6 +100,141 @@ bugger_off_msg:
.ascii "Remove disk and press any key to reboot . . .\r\n"
.byte 0
#ifdef CONFIG_EFI_STUB
pe_header:
.ascii "PE"
.word 0
coff_header:
#ifdef CONFIG_X86_32
.word 0x14c # i386
#else
.word 0x8664 # x86-64
#endif
.word 2 # nr_sections
.long 0 # TimeDateStamp
.long 0 # PointerToSymbolTable
.long 1 # NumberOfSymbols
.word section_table - optional_header # SizeOfOptionalHeader
#ifdef CONFIG_X86_32
.word 0x306 # Characteristics.
# IMAGE_FILE_32BIT_MACHINE |
# IMAGE_FILE_DEBUG_STRIPPED |
# IMAGE_FILE_EXECUTABLE_IMAGE |
# IMAGE_FILE_LINE_NUMS_STRIPPED
#else
.word 0x206 # Characteristics
# IMAGE_FILE_DEBUG_STRIPPED |
# IMAGE_FILE_EXECUTABLE_IMAGE |
# IMAGE_FILE_LINE_NUMS_STRIPPED
#endif
optional_header:
#ifdef CONFIG_X86_32
.word 0x10b # PE32 format
#else
.word 0x20b # PE32+ format
#endif
.byte 0x02 # MajorLinkerVersion
.byte 0x14 # MinorLinkerVersion
# Filled in by build.c
.long 0 # SizeOfCode
.long 0 # SizeOfInitializedData
.long 0 # SizeOfUninitializedData
# Filled in by build.c
.long 0x0000 # AddressOfEntryPoint
.long 0x0000 # BaseOfCode
#ifdef CONFIG_X86_32
.long 0 # data
#endif
extra_header_fields:
#ifdef CONFIG_X86_32
.long 0 # ImageBase
#else
.quad 0 # ImageBase
#endif
.long 0x1000 # SectionAlignment
.long 0x200 # FileAlignment
.word 0 # MajorOperatingSystemVersion
.word 0 # MinorOperatingSystemVersion
.word 0 # MajorImageVersion
.word 0 # MinorImageVersion
.word 0 # MajorSubsystemVersion
.word 0 # MinorSubsystemVersion
.long 0 # Win32VersionValue
#
# The size of the bzImage is written in tools/build.c
#
.long 0 # SizeOfImage
.long 0x200 # SizeOfHeaders
.long 0 # CheckSum
.word 0xa # Subsystem (EFI application)
.word 0 # DllCharacteristics
#ifdef CONFIG_X86_32
.long 0 # SizeOfStackReserve
.long 0 # SizeOfStackCommit
.long 0 # SizeOfHeapReserve
.long 0 # SizeOfHeapCommit
#else
.quad 0 # SizeOfStackReserve
.quad 0 # SizeOfStackCommit
.quad 0 # SizeOfHeapReserve
.quad 0 # SizeOfHeapCommit
#endif
.long 0 # LoaderFlags
.long 0x1 # NumberOfRvaAndSizes
.quad 0 # ExportTable
.quad 0 # ImportTable
.quad 0 # ResourceTable
.quad 0 # ExceptionTable
.quad 0 # CertificationTable
.quad 0 # BaseRelocationTable
# Section table
section_table:
.ascii ".text"
.byte 0
.byte 0
.byte 0
.long 0
.long 0x0 # startup_{32,64}
.long 0 # Size of initialized data
# on disk
.long 0x0 # startup_{32,64}
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0x60500020 # Characteristics (section flags)
#
# The EFI application loader requires a relocation section
# because EFI applications are relocatable and not having
# this section seems to confuse it. But since we don't need
# the loader to fixup any relocs for us just fill it with a
# single dummy reloc.
#
.ascii ".reloc"
.byte 0
.byte 0
.long reloc_end - reloc_start
.long reloc_start
.long reloc_end - reloc_start # SizeOfRawData
.long reloc_start # PointerToRawData
.long 0 # PointerToRelocations
.long 0 # PointerToLineNumbers
.word 0 # NumberOfRelocations
.word 0 # NumberOfLineNumbers
.long 0x42100040 # Characteristics (section flags)
#endif /* CONFIG_EFI_STUB */
# Kernel attributes; used by setup. This is part 1 of the
# header, from the old boot sector.
......@@ -318,3 +466,13 @@ die:
setup_corrupt:
.byte 7
.string "No setup signature found...\n"
.data
dummy: .long 0
.section .reloc
reloc_start:
.long dummy - reloc_start
.long 10
.word 0
reloc_end:
......@@ -111,3 +111,38 @@ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int bas
return result;
}
/**
* strlen - Find the length of a string
* @s: The string to be sized
*/
size_t strlen(const char *s)
{
const char *sc;
for (sc = s; *sc != '\0'; ++sc)
/* nothing */;
return sc - s;
}
/**
* strstr - Find the first substring in a %NUL terminated string
* @s1: The string to be searched
* @s2: The string to search for
*/
char *strstr(const char *s1, const char *s2)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
......@@ -135,6 +135,9 @@ static void usage(void)
int main(int argc, char ** argv)
{
#ifdef CONFIG_EFI_STUB
unsigned int file_sz, pe_header;
#endif
unsigned int i, sz, setup_sectors;
int c;
u32 sys_size;
......@@ -194,6 +197,42 @@ int main(int argc, char ** argv)
buf[0x1f6] = sys_size >> 16;
buf[0x1f7] = sys_size >> 24;
#ifdef CONFIG_EFI_STUB
file_sz = sz + i + ((sys_size * 16) - sz);
pe_header = *(unsigned int *)&buf[0x3c];
/* Size of code */
*(unsigned int *)&buf[pe_header + 0x1c] = file_sz;
/* Size of image */
*(unsigned int *)&buf[pe_header + 0x50] = file_sz;
#ifdef CONFIG_X86_32
/* Address of entry point */
*(unsigned int *)&buf[pe_header + 0x28] = i;
/* .text size */
*(unsigned int *)&buf[pe_header + 0xb0] = file_sz;
/* .text size of initialised data */
*(unsigned int *)&buf[pe_header + 0xb8] = file_sz;
#else
/*
* Address of entry point. startup_32 is at the beginning and
* the 64-bit entry point (startup_64) is always 512 bytes
* after.
*/
*(unsigned int *)&buf[pe_header + 0x28] = i + 512;
/* .text size */
*(unsigned int *)&buf[pe_header + 0xc0] = file_sz;
/* .text size of initialised data */
*(unsigned int *)&buf[pe_header + 0xc8] = file_sz;
#endif /* CONFIG_X86_32 */
#endif /* CONFIG_EFI_STUB */
crc = partial_crc32(buf, i, crc);
if (fwrite(buf, 1, i, stdout) != i)
die("Writing setup failed");
......
......@@ -67,4 +67,6 @@ void common(void) {
OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch);
OFFSET(BP_version, boot_params, hdr.version);
OFFSET(BP_kernel_alignment, boot_params, hdr.kernel_alignment);
OFFSET(BP_pref_address, boot_params, hdr.pref_address);
OFFSET(BP_code32_start, boot_params, hdr.code32_start);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment