Commit 310c33dc authored by Palmer Dabbelt's avatar Palmer Dabbelt

Merge patch series "Introduce 64b relocatable kernel"

Alexandre Ghiti <alexghiti@rivosinc.com> says:

After multiple attempts, this patchset is now based on the fact that the
64b kernel mapping was moved outside the linear mapping.

The first patch allows to build relocatable kernels but is not selected
by default. That patch is a requirement for KASLR.
The second and third patches take advantage of an already existing powerpc
script that checks relocations at compile-time, and uses it for riscv.

* b4-shazam-merge:
  riscv: Use --emit-relocs in order to move .rela.dyn in init
  riscv: Check relocations at compile time
  powerpc: Move script to check relocations at compile time in scripts/
  riscv: Introduce CONFIG_RELOCATABLE
  riscv: Move .rela.dyn outside of init to avoid empty relocations
  riscv: Prepare EFI header for relocatable kernels

Link: https://lore.kernel.org/r/20230329045329.64565-1-alexghiti@rivosinc.comSigned-off-by: default avatarPalmer Dabbelt <palmer@rivosinc.com>
parents 2667e367 559d1e45
......@@ -15,21 +15,8 @@ if [ $# -lt 3 ]; then
exit 1
fi
# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
objdump="$1"
nm="$2"
vmlinux="$3"
# Remove from the bad relocations those that match an undefined weak symbol
# which will result in an absolute relocation to 0.
# Weak unresolved symbols are of that form in nm output:
# " w _binary__btf_vmlinux_bin_end"
undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
bad_relocs=$(
$objdump -R "$vmlinux" |
# Only look at relocation lines.
grep -E '\<R_' |
${srctree}/scripts/relocs_check.sh "$@" |
# These relocations are okay
# On PPC64:
# R_PPC64_RELATIVE, R_PPC64_NONE
......@@ -44,8 +31,7 @@ R_PPC_ADDR16_LO
R_PPC_ADDR16_HI
R_PPC_ADDR16_HA
R_PPC_RELATIVE
R_PPC_NONE' |
([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
R_PPC_NONE'
)
if [ -z "$bad_relocs" ]; then
......
......@@ -603,6 +603,20 @@ config COMPAT
If you want to execute 32-bit userspace applications, say Y.
config RELOCATABLE
bool "Build a relocatable kernel"
depends on MMU && 64BIT && !XIP_KERNEL
help
This builds a kernel as a Position Independent Executable (PIE),
which retains all relocation metadata required to relocate the
kernel binary at runtime to a different virtual address than the
address it was linked at.
Since RISCV uses the RELA relocation format, this requires a
relocation pass at runtime even if the kernel is loaded at the
same address it was linked at.
If unsure, say N.
endmenu # "Kernel features"
menu "Boot options"
......
......@@ -7,9 +7,12 @@
#
OBJCOPYFLAGS := -O binary
LDFLAGS_vmlinux :=
ifeq ($(CONFIG_RELOCATABLE),y)
LDFLAGS_vmlinux += -shared -Bsymbolic -z notext -z norelro --emit-relocs
KBUILD_CFLAGS += -fPIE
endif
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
LDFLAGS_vmlinux := --no-relax
LDFLAGS_vmlinux += --no-relax
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
ifeq ($(CONFIG_RISCV_ISA_C),y)
CC_FLAGS_FTRACE := -fpatchable-function-entry=4
......
# SPDX-License-Identifier: GPL-2.0
# ===========================================================================
# Post-link riscv pass
# ===========================================================================
#
# Check that vmlinux relocations look sane
PHONY := __archpost
__archpost:
-include include/config/auto.conf
include $(srctree)/scripts/Kbuild.include
quiet_cmd_relocs_check = CHKREL $@
cmd_relocs_check = \
$(CONFIG_SHELL) $(srctree)/arch/riscv/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
ifdef CONFIG_RELOCATABLE
quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs
cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs
quiet_cmd_relocs_strip = STRIPREL $@
cmd_relocs_strip = $(OBJCOPY) --remove-section='.rel.*' \
--remove-section='.rel__*' \
--remove-section='.rela.*' \
--remove-section='.rela__*' $@
endif
# `@true` prevents complaint when there is nothing to be done
vmlinux: FORCE
@true
ifdef CONFIG_RELOCATABLE
$(call if_changed,relocs_check)
$(call if_changed,cp_vmlinux_relocs)
$(call if_changed,relocs_strip)
endif
%.ko: FORCE
@true
clean:
@true
PHONY += FORCE clean
FORCE:
.PHONY: $(PHONY)
......@@ -33,7 +33,14 @@ $(obj)/xipImage: vmlinux FORCE
endif
ifdef CONFIG_RELOCATABLE
vmlinux.relocs: vmlinux
@ (! [ -f vmlinux.relocs ] && echo "vmlinux.relocs can't be found, please remove vmlinux and try again") || true
$(obj)/Image: vmlinux.relocs FORCE
else
$(obj)/Image: vmlinux FORCE
endif
$(call if_changed,objcopy)
$(obj)/Image.gz: $(obj)/Image FORCE
......
......@@ -56,4 +56,7 @@ bool kernel_page_present(struct page *page);
#define SECTION_ALIGN L1_CACHE_BYTES
#endif /* CONFIG_STRICT_KERNEL_RWX */
#define PECOFF_SECTION_ALIGNMENT 0x1000
#define PECOFF_FILE_ALIGNMENT 0x200
#endif /* _ASM_RISCV_SET_MEMORY_H */
......@@ -5,8 +5,8 @@
#include <linux/arch_topology.h>
/* Replace task scheduler's default frequency-invariant accounting */
#define arch_scale_freq_tick topology_scale_freq_tick
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_tick topology_scale_freq_tick
#define arch_set_freq_scale topology_set_freq_scale
#define arch_scale_freq_capacity topology_get_freq_scale
#define arch_scale_freq_invariant topology_scale_freq_invariant
......@@ -17,4 +17,5 @@
#define arch_update_cpu_topology topology_update_cpu_topology
#include <asm-generic/topology.h>
#endif /* _ASM_RISCV_TOPOLOGY_H */
......@@ -6,6 +6,7 @@
#include <linux/pe.h>
#include <linux/sizes.h>
#include <asm/set_memory.h>
.macro __EFI_PE_HEADER
.long PE_MAGIC
......@@ -33,7 +34,11 @@ optional_header:
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
.long __pecoff_text_end - efi_header_end // SizeOfCode
.long __pecoff_data_virt_size // SizeOfInitializedData
#ifdef __clang__
.long __pecoff_data_virt_size // SizeOfInitializedData
#else
.long __pecoff_data_virt_end - __pecoff_text_end // SizeOfInitializedData
#endif
.long 0 // SizeOfUninitializedData
.long __efistub_efi_pe_entry - _start // AddressOfEntryPoint
.long efi_header_end - _start // BaseOfCode
......@@ -91,9 +96,17 @@ section_table:
IMAGE_SCN_MEM_EXECUTE // Characteristics
.ascii ".data\0\0\0"
.long __pecoff_data_virt_size // VirtualSize
#ifdef __clang__
.long __pecoff_data_virt_size // VirtualSize
#else
.long __pecoff_data_virt_end - __pecoff_text_end // VirtualSize
#endif
.long __pecoff_text_end - _start // VirtualAddress
.long __pecoff_data_raw_size // SizeOfRawData
#ifdef __clang__
.long __pecoff_data_raw_size // SizeOfRawData
#else
.long __pecoff_data_raw_end - __pecoff_text_end // SizeOfRawData
#endif
.long __pecoff_text_end - _start // PointerToRawData
.long 0 // PointerToRelocations
......
......@@ -27,9 +27,6 @@ ENTRY(_start)
jiffies = jiffies_64;
PECOFF_SECTION_ALIGNMENT = 0x1000;
PECOFF_FILE_ALIGNMENT = 0x200;
SECTIONS
{
/* Beginning of code and text segment */
......@@ -99,10 +96,6 @@ SECTIONS
*(.rel.dyn*)
}
.rela.dyn : {
*(.rela*)
}
__init_data_end = .;
. = ALIGN(8);
......@@ -129,9 +122,27 @@ SECTIONS
*(.sdata*)
}
.rela.dyn : ALIGN(8) {
__rela_dyn_start = .;
*(.rela .rela*)
__rela_dyn_end = .;
}
#ifdef CONFIG_RELOCATABLE
.data.rel : { *(.data.rel*) }
.got : { *(.got*) }
.plt : { *(.plt) }
.dynamic : { *(.dynamic) }
.dynsym : { *(.dynsym) }
.dynstr : { *(.dynstr) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }
#endif
#ifdef CONFIG_EFI
.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
__pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
__pecoff_data_raw_end = ABSOLUTE(.);
#endif
/* End of data section */
......@@ -142,6 +153,7 @@ SECTIONS
#ifdef CONFIG_EFI
. = ALIGN(PECOFF_SECTION_ALIGNMENT);
__pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
__pecoff_data_virt_end = ABSOLUTE(.);
#endif
_end = .;
......
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS_init.o := -mcmodel=medany
ifdef CONFIG_RELOCATABLE
CFLAGS_init.o += -fno-pie
endif
ifdef CONFIG_FTRACE
CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
......
......@@ -20,6 +20,9 @@
#include <linux/dma-map-ops.h>
#include <linux/crash_dump.h>
#include <linux/hugetlb.h>
#ifdef CONFIG_RELOCATABLE
#include <linux/elf.h>
#endif
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
......@@ -146,7 +149,7 @@ static void __init print_vm_layout(void)
print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
#endif
print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
print_ml("kernel", (unsigned long)kernel_map.virt_addr,
(unsigned long)ADDRESS_SPACE_END);
}
}
......@@ -831,6 +834,44 @@ static __init void set_satp_mode(void)
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif
#ifdef CONFIG_RELOCATABLE
extern unsigned long __rela_dyn_start, __rela_dyn_end;
static void __init relocate_kernel(void)
{
Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
/*
* This holds the offset between the linked virtual address and the
* relocated virtual address.
*/
uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
/*
* This holds the offset between kernel linked virtual address and
* physical address.
*/
uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
Elf64_Addr relocated_addr = rela->r_addend;
if (rela->r_info != R_RISCV_RELATIVE)
continue;
/*
* Make sure to not relocate vdso symbols like rt_sigreturn
* which are linked from the address 0 in vmlinux since
* vdso symbol addresses are actually used as an offset from
* mm->context.vdso in VDSO_OFFSET macro.
*/
if (relocated_addr >= KERNEL_LINK_ADDR)
relocated_addr += reloc_offset;
*(Elf64_Addr *)addr = relocated_addr;
}
}
#endif /* CONFIG_RELOCATABLE */
#ifdef CONFIG_XIP_KERNEL
static void __init create_kernel_page_table(pgd_t *pgdir,
__always_unused bool early)
......@@ -1029,6 +1070,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
#endif
#ifdef CONFIG_RELOCATABLE
/*
* Early page table uses only one PUD, which makes it possible
* to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
* makes the kernel cross over a PUD_SIZE boundary, raise a bug
* since a part of the kernel would not get mapped.
*/
BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
relocate_kernel();
#endif
apply_early_boot_alternatives();
pt_ops_set_early();
......
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-or-later
# Based on powerpc relocs_check.sh
# This script checks the relocations of a vmlinux for "suspicious"
# relocations.
if [ $# -lt 3 ]; then
echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2
exit 1
fi
bad_relocs=$(
${srctree}/scripts/relocs_check.sh "$@" |
# These relocations are okay
# R_RISCV_RELATIVE
grep -F -w -v 'R_RISCV_RELATIVE'
)
if [ -z "$bad_relocs" ]; then
exit 0
fi
num_bad=$(echo "$bad_relocs" | wc -l)
echo "WARNING: $num_bad bad relocations"
echo "$bad_relocs"
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-or-later
# Get a list of all the relocations, remove from it the relocations
# that are known to be legitimate and return this list to arch specific
# script that will look for suspicious relocations.
objdump="$1"
nm="$2"
vmlinux="$3"
# Remove from the possible bad relocations those that match an undefined
# weak symbol which will result in an absolute relocation to 0.
# Weak unresolved symbols are of that form in nm output:
# " w _binary__btf_vmlinux_bin_end"
undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
$objdump -R "$vmlinux" |
grep -E '\<R_' |
([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment