Commit 9326011e authored by Ingo Molnar's avatar Ingo Molnar

Merge branch 'x86/cleanups' into x86/cpu, to pick up dependent changes

Signed-off-by: default avatarIngo Molnar <mingo@kernel.org>
parents 77e5517c 32b1cbe3
...@@ -45,14 +45,13 @@ ...@@ -45,14 +45,13 @@
* We do the locked ops that don't return the old value as * We do the locked ops that don't return the old value as
* a mask operation on a byte. * a mask operation on a byte.
*/ */
#define IS_IMMEDIATE(nr) (__builtin_constant_p(nr))
#define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3)) #define CONST_MASK_ADDR(nr, addr) WBYTE_ADDR((void *)(addr) + ((nr)>>3))
#define CONST_MASK(nr) (1 << ((nr) & 7)) #define CONST_MASK(nr) (1 << ((nr) & 7))
static __always_inline void static __always_inline void
arch_set_bit(long nr, volatile unsigned long *addr) arch_set_bit(long nr, volatile unsigned long *addr)
{ {
if (IS_IMMEDIATE(nr)) { if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "orb %1,%0" asm volatile(LOCK_PREFIX "orb %1,%0"
: CONST_MASK_ADDR(nr, addr) : CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr)) : "iq" ((u8)CONST_MASK(nr))
...@@ -72,7 +71,7 @@ arch___set_bit(long nr, volatile unsigned long *addr) ...@@ -72,7 +71,7 @@ arch___set_bit(long nr, volatile unsigned long *addr)
static __always_inline void static __always_inline void
arch_clear_bit(long nr, volatile unsigned long *addr) arch_clear_bit(long nr, volatile unsigned long *addr)
{ {
if (IS_IMMEDIATE(nr)) { if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "andb %1,%0" asm volatile(LOCK_PREFIX "andb %1,%0"
: CONST_MASK_ADDR(nr, addr) : CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)~CONST_MASK(nr))); : "iq" ((u8)~CONST_MASK(nr)));
...@@ -123,7 +122,7 @@ arch___change_bit(long nr, volatile unsigned long *addr) ...@@ -123,7 +122,7 @@ arch___change_bit(long nr, volatile unsigned long *addr)
static __always_inline void static __always_inline void
arch_change_bit(long nr, volatile unsigned long *addr) arch_change_bit(long nr, volatile unsigned long *addr)
{ {
if (IS_IMMEDIATE(nr)) { if (__builtin_constant_p(nr)) {
asm volatile(LOCK_PREFIX "xorb %1,%0" asm volatile(LOCK_PREFIX "xorb %1,%0"
: CONST_MASK_ADDR(nr, addr) : CONST_MASK_ADDR(nr, addr)
: "iq" ((u8)CONST_MASK(nr))); : "iq" ((u8)CONST_MASK(nr)));
......
...@@ -61,6 +61,13 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; ...@@ -61,6 +61,13 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
#define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \ #define CHECK_BIT_IN_MASK_WORD(maskname, word, bit) \
(((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word )) (((bit)>>5)==(word) && (1UL<<((bit)&31) & maskname##word ))
/*
* {REQUIRED,DISABLED}_MASK_CHECK below may seem duplicated with the
* following BUILD_BUG_ON_ZERO() check but when NCAPINTS gets changed, all
* header macros which use NCAPINTS need to be changed. The duplicated macro
* use causes the compiler to issue errors for all headers so that all usage
* sites can be corrected.
*/
#define REQUIRED_MASK_BIT_SET(feature_bit) \ #define REQUIRED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \ ( CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 0, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 1, feature_bit) || \
......
...@@ -20,7 +20,6 @@ struct real_mode_header { ...@@ -20,7 +20,6 @@ struct real_mode_header {
u32 ro_end; u32 ro_end;
/* SMP trampoline */ /* SMP trampoline */
u32 trampoline_start; u32 trampoline_start;
u32 trampoline_status;
u32 trampoline_header; u32 trampoline_header;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
u32 trampoline_pgd; u32 trampoline_pgd;
......
...@@ -45,8 +45,8 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len); ...@@ -45,8 +45,8 @@ extern void text_poke_early(void *addr, const void *opcode, size_t len);
* no thread can be preempted in the instructions being modified (no iret to an * no thread can be preempted in the instructions being modified (no iret to an
* invalid instruction possible) or if the instructions are changed from a * invalid instruction possible) or if the instructions are changed from a
* consistent state to another consistent state atomically. * consistent state to another consistent state atomically.
* On the local CPU you need to be protected again NMI or MCE handlers seeing an * On the local CPU you need to be protected against NMI or MCE handlers seeing
* inconsistent instruction while you patch. * an inconsistent instruction while you patch.
*/ */
extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke(void *addr, const void *opcode, size_t len);
extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
......
...@@ -713,7 +713,7 @@ void __init alternative_instructions(void) ...@@ -713,7 +713,7 @@ void __init alternative_instructions(void)
* Don't stop machine check exceptions while patching. * Don't stop machine check exceptions while patching.
* MCEs only happen when something got corrupted and in this * MCEs only happen when something got corrupted and in this
* case we must do something about the corruption. * case we must do something about the corruption.
* Ignoring it is worse than a unlikely patching race. * Ignoring it is worse than an unlikely patching race.
* Also machine checks tend to be broadcast and if one CPU * Also machine checks tend to be broadcast and if one CPU
* goes into machine check the others follow quickly, so we don't * goes into machine check the others follow quickly, so we don't
* expect a machine check to cause undue problems during to code * expect a machine check to cause undue problems during to code
...@@ -753,8 +753,8 @@ void __init alternative_instructions(void) ...@@ -753,8 +753,8 @@ void __init alternative_instructions(void)
* When you use this code to patch more than one byte of an instruction * When you use this code to patch more than one byte of an instruction
* you need to make sure that other CPUs cannot execute this code in parallel. * you need to make sure that other CPUs cannot execute this code in parallel.
* Also no thread must be currently preempted in the middle of these * Also no thread must be currently preempted in the middle of these
* instructions. And on the local CPU you need to be protected again NMI or MCE * instructions. And on the local CPU you need to be protected against NMI or
* handlers seeing an inconsistent instruction while you patch. * MCE handlers seeing an inconsistent instruction while you patch.
*/ */
void __init_or_module text_poke_early(void *addr, const void *opcode, void __init_or_module text_poke_early(void *addr, const void *opcode,
size_t len) size_t len)
......
...@@ -265,9 +265,9 @@ static void early_init_intel(struct cpuinfo_x86 *c) ...@@ -265,9 +265,9 @@ static void early_init_intel(struct cpuinfo_x86 *c)
/* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
if (c->x86 == 6) { if (c->x86 == 6) {
switch (c->x86_model) { switch (c->x86_model) {
case 0x27: /* Penwell */ case INTEL_FAM6_ATOM_SALTWELL_MID:
case 0x35: /* Cloverview */ case INTEL_FAM6_ATOM_SALTWELL_TABLET:
case 0x4a: /* Merrifield */ case INTEL_FAM6_ATOM_SILVERMONT_MID:
set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
break; break;
default: default:
......
...@@ -225,8 +225,6 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem) ...@@ -225,8 +225,6 @@ static int elf_header_exclude_ranges(struct crash_mem *cmem)
if (crashk_low_res.end) { if (crashk_low_res.end) {
ret = crash_exclude_mem_range(cmem, crashk_low_res.start, ret = crash_exclude_mem_range(cmem, crashk_low_res.start,
crashk_low_res.end); crashk_low_res.end);
if (ret)
return ret;
} }
return ret; return ret;
......
...@@ -403,7 +403,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, ...@@ -403,7 +403,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe *op,
(u8 *)op->kp.addr + op->optinsn.size); (u8 *)op->kp.addr + op->optinsn.size);
len += RELATIVEJUMP_SIZE; len += RELATIVEJUMP_SIZE;
/* We have to use text_poke for instuction buffer because it is RO */ /* We have to use text_poke() for instruction buffer because it is RO */
text_poke(slot, buf, len); text_poke(slot, buf, len);
ret = 0; ret = 0;
out: out:
......
...@@ -90,8 +90,6 @@ static void ich_force_hpet_resume(void) ...@@ -90,8 +90,6 @@ static void ich_force_hpet_resume(void)
BUG(); BUG();
else else
printk(KERN_DEBUG "Force enabled HPET at resume\n"); printk(KERN_DEBUG "Force enabled HPET at resume\n");
return;
} }
static void ich_force_enable_hpet(struct pci_dev *dev) static void ich_force_enable_hpet(struct pci_dev *dev)
...@@ -448,7 +446,6 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev) ...@@ -448,7 +446,6 @@ static void nvidia_force_enable_hpet(struct pci_dev *dev)
dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n", dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at 0x%lx\n",
force_hpet_address); force_hpet_address);
cached_dev = dev; cached_dev = dev;
return;
} }
/* ISA Bridges */ /* ISA Bridges */
...@@ -513,7 +510,6 @@ static void e6xx_force_enable_hpet(struct pci_dev *dev) ...@@ -513,7 +510,6 @@ static void e6xx_force_enable_hpet(struct pci_dev *dev)
force_hpet_resume_type = NONE_FORCE_HPET_RESUME; force_hpet_resume_type = NONE_FORCE_HPET_RESUME;
dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at " dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
"0x%lx\n", force_hpet_address); "0x%lx\n", force_hpet_address);
return;
} }
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU, DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
e6xx_force_enable_hpet); e6xx_force_enable_hpet);
......
...@@ -1023,8 +1023,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle) ...@@ -1023,8 +1023,6 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
int *cpu0_nmi_registered) int *cpu0_nmi_registered)
{ {
volatile u32 *trampoline_status =
(volatile u32 *) __va(real_mode_header->trampoline_status);
/* start_ip had better be page-aligned! */ /* start_ip had better be page-aligned! */
unsigned long start_ip = real_mode_header->trampoline_start; unsigned long start_ip = real_mode_header->trampoline_start;
...@@ -1116,9 +1114,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle, ...@@ -1116,9 +1114,6 @@ static int do_boot_cpu(int apicid, int cpu, struct task_struct *idle,
} }
} }
/* mark "stuck" area as not stuck */
*trampoline_status = 0;
if (x86_platform.legacy.warm_reset) { if (x86_platform.legacy.warm_reset) {
/* /*
* Cleanup possible dangling ends... * Cleanup possible dangling ends...
......
...@@ -19,7 +19,6 @@ GLOBAL(real_mode_header) ...@@ -19,7 +19,6 @@ GLOBAL(real_mode_header)
.long pa_ro_end .long pa_ro_end
/* SMP trampoline */ /* SMP trampoline */
.long pa_trampoline_start .long pa_trampoline_start
.long pa_trampoline_status
.long pa_trampoline_header .long pa_trampoline_header
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
.long pa_trampoline_pgd; .long pa_trampoline_pgd;
......
...@@ -41,9 +41,6 @@ ENTRY(trampoline_start) ...@@ -41,9 +41,6 @@ ENTRY(trampoline_start)
movl tr_start, %eax # where we need to go movl tr_start, %eax # where we need to go
movl $0xA5A5A5A5, trampoline_status
# write marker for master knows we're running
/* /*
* GDT tables in non default location kernel can be beyond 16MB and * GDT tables in non default location kernel can be beyond 16MB and
* lgdt will not be able to load the address as in real mode default * lgdt will not be able to load the address as in real mode default
......
...@@ -49,9 +49,6 @@ ENTRY(trampoline_start) ...@@ -49,9 +49,6 @@ ENTRY(trampoline_start)
mov %ax, %es mov %ax, %es
mov %ax, %ss mov %ax, %ss
movl $0xA5A5A5A5, trampoline_status
# write marker for master knows we're running
# Setup stack # Setup stack
movl $rm_stack_end, %esp movl $rm_stack_end, %esp
......
...@@ -2,7 +2,3 @@ ...@@ -2,7 +2,3 @@
.section ".rodata","a" .section ".rodata","a"
.balign 16 .balign 16
tr_idt: .fill 1, 6, 0 tr_idt: .fill 1, 6, 0
.bss
.balign 4
GLOBAL(trampoline_status) .space 4
...@@ -181,7 +181,7 @@ struct prctl_mm_map { ...@@ -181,7 +181,7 @@ struct prctl_mm_map {
#define PR_GET_THP_DISABLE 42 #define PR_GET_THP_DISABLE 42
/* /*
* Tell the kernel to start/stop helping userspace manage bounds tables. * No longer implemented, but left here to ensure the numbers stay reserved:
*/ */
#define PR_MPX_ENABLE_MANAGEMENT 43 #define PR_MPX_ENABLE_MANAGEMENT 43
#define PR_MPX_DISABLE_MANAGEMENT 44 #define PR_MPX_DISABLE_MANAGEMENT 44
......
...@@ -103,12 +103,6 @@ ...@@ -103,12 +103,6 @@
#ifndef SET_TSC_CTL #ifndef SET_TSC_CTL
# define SET_TSC_CTL(a) (-EINVAL) # define SET_TSC_CTL(a) (-EINVAL)
#endif #endif
#ifndef MPX_ENABLE_MANAGEMENT
# define MPX_ENABLE_MANAGEMENT() (-EINVAL)
#endif
#ifndef MPX_DISABLE_MANAGEMENT
# define MPX_DISABLE_MANAGEMENT() (-EINVAL)
#endif
#ifndef GET_FP_MODE #ifndef GET_FP_MODE
# define GET_FP_MODE(a) (-EINVAL) # define GET_FP_MODE(a) (-EINVAL)
#endif #endif
...@@ -2456,15 +2450,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, ...@@ -2456,15 +2450,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
up_write(&me->mm->mmap_sem); up_write(&me->mm->mmap_sem);
break; break;
case PR_MPX_ENABLE_MANAGEMENT: case PR_MPX_ENABLE_MANAGEMENT:
if (arg2 || arg3 || arg4 || arg5)
return -EINVAL;
error = MPX_ENABLE_MANAGEMENT();
break;
case PR_MPX_DISABLE_MANAGEMENT: case PR_MPX_DISABLE_MANAGEMENT:
if (arg2 || arg3 || arg4 || arg5) /* No longer implemented: */
return -EINVAL; return -EINVAL;
error = MPX_DISABLE_MANAGEMENT();
break;
case PR_SET_FP_MODE: case PR_SET_FP_MODE:
error = SET_FP_MODE(me, arg2); error = SET_FP_MODE(me, arg2);
break; break;
......
...@@ -21,10 +21,6 @@ KCOV_INSTRUMENT_dynamic_debug.o := n ...@@ -21,10 +21,6 @@ KCOV_INSTRUMENT_dynamic_debug.o := n
ifdef CONFIG_AMD_MEM_ENCRYPT ifdef CONFIG_AMD_MEM_ENCRYPT
KASAN_SANITIZE_string.o := n KASAN_SANITIZE_string.o := n
ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_string.o = -pg
endif
CFLAGS_string.o := $(call cc-option, -fno-stack-protector) CFLAGS_string.o := $(call cc-option, -fno-stack-protector)
endif endif
......
...@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) ...@@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie) CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ check_initial_reg_state sigreturn iopl ioperm \
protection_keys test_vdso test_vsyscall mov_ss_trap \ protection_keys test_vdso test_vsyscall mov_ss_trap \
syscall_arg_fault syscall_arg_fault
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MPX_DEBUG_H
#define _MPX_DEBUG_H
#ifndef DEBUG_LEVEL
#define DEBUG_LEVEL 0
#endif
#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0)
#define dprintf1(args...) dprintf_level(1, args)
#define dprintf2(args...) dprintf_level(2, args)
#define dprintf3(args...) dprintf_level(3, args)
#define dprintf4(args...) dprintf_level(4, args)
#define dprintf5(args...) dprintf_level(5, args)
#endif /* _MPX_DEBUG_H */
// SPDX-License-Identifier: GPL-2.0
/*
* Written by Dave Hansen <dave.hansen@intel.com>
*/
#include <stdlib.h>
#include <sys/types.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <string.h>
#include <fcntl.h>
#include "mpx-debug.h"
#include "mpx-mm.h"
#include "mpx-hw.h"
unsigned long bounds_dir_global;
#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__)
static void inline __mpx_dig_abort(const char *file, const char *func, int line)
{
fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func);
printf("MPX dig abort @ %s::%d in %s()\n", file, line, func);
abort();
}
/*
* run like this (BDIR finds the probably bounds directory):
*
* BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \
* | head -1 | awk -F- '{print $1}')";
* ./mpx-dig $pid 0x$BDIR
*
* NOTE:
* assumes that the only 2097152-kb VMA is the bounds dir
*/
long nr_incore(void *ptr, unsigned long size_bytes)
{
int i;
long ret = 0;
long vec_len = size_bytes / PAGE_SIZE;
unsigned char *vec = malloc(vec_len);
int incore_ret;
if (!vec)
mpx_dig_abort();
incore_ret = mincore(ptr, size_bytes, vec);
if (incore_ret) {
printf("mincore ret: %d\n", incore_ret);
perror("mincore");
mpx_dig_abort();
}
for (i = 0; i < vec_len; i++)
ret += vec[i];
free(vec);
return ret;
}
int open_proc(int pid, char *file)
{
static char buf[100];
int fd;
snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file);
fd = open(&buf[0], O_RDONLY);
if (fd < 0)
perror(buf);
return fd;
}
struct vaddr_range {
unsigned long start;
unsigned long end;
};
struct vaddr_range *ranges;
int nr_ranges_allocated;
int nr_ranges_populated;
int last_range = -1;
int __pid_load_vaddrs(int pid)
{
int ret = 0;
int proc_maps_fd = open_proc(pid, "maps");
char linebuf[10000];
unsigned long start;
unsigned long end;
char rest[1000];
FILE *f = fdopen(proc_maps_fd, "r");
if (!f)
mpx_dig_abort();
nr_ranges_populated = 0;
while (!feof(f)) {
char *readret = fgets(linebuf, sizeof(linebuf), f);
int parsed;
if (readret == NULL) {
if (feof(f))
break;
mpx_dig_abort();
}
parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest);
if (parsed != 3)
mpx_dig_abort();
dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest);
if (nr_ranges_populated >= nr_ranges_allocated) {
ret = -E2BIG;
break;
}
ranges[nr_ranges_populated].start = start;
ranges[nr_ranges_populated].end = end;
nr_ranges_populated++;
}
last_range = -1;
fclose(f);
close(proc_maps_fd);
return ret;
}
int pid_load_vaddrs(int pid)
{
int ret;
dprintf2("%s(%d)\n", __func__, pid);
if (!ranges) {
nr_ranges_allocated = 4;
ranges = malloc(nr_ranges_allocated * sizeof(ranges[0]));
dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid,
nr_ranges_allocated, ranges);
assert(ranges != NULL);
}
do {
ret = __pid_load_vaddrs(pid);
if (!ret)
break;
if (ret == -E2BIG) {
dprintf2("%s(%d) need to realloc\n", __func__, pid);
nr_ranges_allocated *= 2;
ranges = realloc(ranges,
nr_ranges_allocated * sizeof(ranges[0]));
dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__,
pid, nr_ranges_allocated, ranges);
assert(ranges != NULL);
dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated);
}
} while (1);
dprintf2("%s(%d) done\n", __func__, pid);
return ret;
}
static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r)
{
if (vaddr < r->start)
return 0;
if (vaddr >= r->end)
return 0;
return 1;
}
static inline int vaddr_mapped_by_range(unsigned long vaddr)
{
int i;
if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range]))
return 1;
for (i = 0; i < nr_ranges_populated; i++) {
struct vaddr_range *r = &ranges[i];
if (vaddr_in_range(vaddr, r))
continue;
last_range = i;
return 1;
}
return 0;
}
const int bt_entry_size_bytes = sizeof(unsigned long) * 4;
void *read_bounds_table_into_buf(unsigned long table_vaddr)
{
#ifdef MPX_DIG_STANDALONE
static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES];
off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET);
if (seek_ret != table_vaddr)
mpx_dig_abort();
int read_ret = read(fd, &bt_buf, sizeof(bt_buf));
if (read_ret != sizeof(bt_buf))
mpx_dig_abort();
return &bt_buf;
#else
return (void *)table_vaddr;
#endif
}
int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr,
unsigned long bde_vaddr)
{
unsigned long offset_inside_bt;
int nr_entries = 0;
int do_abort = 0;
char *bt_buf;
dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n",
__func__, base_controlled_vaddr, bde_vaddr);
bt_buf = read_bounds_table_into_buf(table_vaddr);
dprintf4("%s() read done\n", __func__);
for (offset_inside_bt = 0;
offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES;
offset_inside_bt += bt_entry_size_bytes) {
unsigned long bt_entry_index;
unsigned long bt_entry_controls;
unsigned long this_bt_entry_for_vaddr;
unsigned long *bt_entry_buf;
int i;
dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__,
offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES);
bt_entry_buf = (void *)&bt_buf[offset_inside_bt];
if (!bt_buf) {
printf("null bt_buf\n");
mpx_dig_abort();
}
if (!bt_entry_buf) {
printf("null bt_entry_buf\n");
mpx_dig_abort();
}
dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__,
bt_entry_buf);
if (!bt_entry_buf[0] &&
!bt_entry_buf[1] &&
!bt_entry_buf[2] &&
!bt_entry_buf[3])
continue;
nr_entries++;
bt_entry_index = offset_inside_bt/bt_entry_size_bytes;
bt_entry_controls = sizeof(void *);
this_bt_entry_for_vaddr =
base_controlled_vaddr + bt_entry_index*bt_entry_controls;
/*
* We sign extend vaddr bits 48->63 which effectively
* creates a hole in the virtual address space.
* This calculation corrects for the hole.
*/
if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL)
this_bt_entry_for_vaddr |= 0xffff800000000000;
if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) {
printf("bt_entry_buf: %p\n", bt_entry_buf);
printf("there is a bte for %lx but no mapping\n",
this_bt_entry_for_vaddr);
printf(" bde vaddr: %016lx\n", bde_vaddr);
printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr);
printf(" table_vaddr: %016lx\n", table_vaddr);
printf(" entry vaddr: %016lx @ offset %lx\n",
table_vaddr + offset_inside_bt, offset_inside_bt);
do_abort = 1;
mpx_dig_abort();
}
if (DEBUG_LEVEL < 4)
continue;
printf("table entry[%lx]: ", offset_inside_bt);
for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long))
printf("0x%016lx ", bt_entry_buf[i]);
printf("\n");
}
if (do_abort)
mpx_dig_abort();
dprintf4("%s() done\n", __func__);
return nr_entries;
}
int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes,
int *nr_populated_bdes)
{
unsigned long i;
int total_entries = 0;
dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf,
len_bytes, bd_offset_bytes, buf + len_bytes);
for (i = 0; i < len_bytes; i += sizeof(unsigned long)) {
unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long);
unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i];
unsigned long bounds_dir_entry;
unsigned long bd_for_vaddr;
unsigned long bt_start;
unsigned long bt_tail;
int nr_entries;
dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i,
bounds_dir_entry_ptr);
bounds_dir_entry = *bounds_dir_entry_ptr;
if (!bounds_dir_entry) {
dprintf4("no bounds dir at index 0x%lx / 0x%lx "
"start at offset:%lx %lx\n", bd_index, bd_index,
bd_offset_bytes, i);
continue;
}
dprintf3("found bounds_dir_entry: 0x%lx @ "
"index 0x%lx buf ptr: %p\n", bounds_dir_entry, i,
&buf[i]);
/* mask off the enable bit: */
bounds_dir_entry &= ~0x1;
(*nr_populated_bdes)++;
dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes);
dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes);
bt_start = bounds_dir_entry;
bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1;
if (!vaddr_mapped_by_range(bt_start)) {
printf("bounds directory 0x%lx points to nowhere\n",
bounds_dir_entry);
mpx_dig_abort();
}
if (!vaddr_mapped_by_range(bt_tail)) {
printf("bounds directory end 0x%lx points to nowhere\n",
bt_tail);
mpx_dig_abort();
}
/*
* Each bounds directory entry controls 1MB of virtual address
* space. This variable is the virtual address in the process
* of the beginning of the area controlled by this bounds_dir.
*/
bd_for_vaddr = bd_index * (1UL<<20);
nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr,
bounds_dir_global+bd_offset_bytes+i);
total_entries += nr_entries;
dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries "
"total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n",
bd_index, buf+i,
bounds_dir_entry, nr_entries, total_entries,
bd_for_vaddr, bd_for_vaddr + (1UL<<20));
}
dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes,
bd_offset_bytes);
return total_entries;
}
int proc_pid_mem_fd = -1;
void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir,
long buffer_size_bytes, void *buffer)
{
unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir;
int read_ret;
off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET);
if (seek_ret != seekto)
mpx_dig_abort();
read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes);
/* there shouldn't practically be short reads of /proc/$pid/mem */
if (read_ret != buffer_size_bytes)
mpx_dig_abort();
return buffer;
}
void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir,
long buffer_size_bytes, void *buffer)
{
unsigned char vec[buffer_size_bytes / PAGE_SIZE];
char *dig_bounds_dir_ptr =
(void *)(bounds_dir_global + byte_offset_inside_bounds_dir);
/*
* use mincore() to quickly find the areas of the bounds directory
* that have memory and thus will be worth scanning.
*/
int incore_ret;
int incore = 0;
int i;
dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr);
incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]);
if (incore_ret) {
printf("mincore ret: %d\n", incore_ret);
perror("mincore");
mpx_dig_abort();
}
for (i = 0; i < sizeof(vec); i++)
incore += vec[i];
dprintf4("%s() total incore: %d\n", __func__, incore);
if (!incore)
return NULL;
dprintf3("%s() total incore: %d\n", __func__, incore);
return dig_bounds_dir_ptr;
}
int inspect_pid(int pid)
{
static int dig_nr;
long offset_inside_bounds_dir;
char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)];
char *dig_bounds_dir_ptr;
int total_entries = 0;
int nr_populated_bdes = 0;
int inspect_self;
if (getpid() == pid) {
dprintf4("inspecting self\n");
inspect_self = 1;
} else {
dprintf4("inspecting pid %d\n", pid);
mpx_dig_abort();
}
for (offset_inside_bounds_dir = 0;
offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES;
offset_inside_bounds_dir += sizeof(bounds_dir_buf)) {
static int bufs_skipped;
int this_entries;
if (inspect_self) {
dig_bounds_dir_ptr =
fill_bounds_dir_buf_self(offset_inside_bounds_dir,
sizeof(bounds_dir_buf),
&bounds_dir_buf[0]);
} else {
dig_bounds_dir_ptr =
fill_bounds_dir_buf_other(offset_inside_bounds_dir,
sizeof(bounds_dir_buf),
&bounds_dir_buf[0]);
}
if (!dig_bounds_dir_ptr) {
bufs_skipped++;
continue;
}
this_entries = search_bd_buf(dig_bounds_dir_ptr,
sizeof(bounds_dir_buf),
offset_inside_bounds_dir,
&nr_populated_bdes);
total_entries += this_entries;
}
printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr,
total_entries, nr_populated_bdes);
return total_entries + nr_populated_bdes;
}
#ifdef MPX_DIG_REMOTE
int main(int argc, char **argv)
{
int err;
char *c;
unsigned long bounds_dir_entry;
int pid;
printf("mpx-dig starting...\n");
err = sscanf(argv[1], "%d", &pid);
printf("parsing: '%s', err: %d\n", argv[1], err);
if (err != 1)
mpx_dig_abort();
err = sscanf(argv[2], "%lx", &bounds_dir_global);
printf("parsing: '%s': %d\n", argv[2], err);
if (err != 1)
mpx_dig_abort();
proc_pid_mem_fd = open_proc(pid, "mem");
if (proc_pid_mem_fd < 0)
mpx_dig_abort();
inspect_pid(pid);
return 0;
}
#endif
long inspect_me(struct mpx_bounds_dir *bounds_dir)
{
int pid = getpid();
pid_load_vaddrs(pid);
bounds_dir_global = (unsigned long)bounds_dir;
dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir);
return inspect_pid(pid);
}
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MPX_HW_H
#define _MPX_HW_H
#include <assert.h>
/* Describe the MPX Hardware Layout in here */
#define NR_MPX_BOUNDS_REGISTERS 4
#ifdef __i386__
#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */
#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */
#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4
#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */
#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2
#define MPX_BOUNDS_TABLE_TOP_BIT 11
#define MPX_BOUNDS_DIR_BOTTOM_BIT 12
#define MPX_BOUNDS_DIR_TOP_BIT 31
#else
/*
* Linear Address of "pointer" (LAp)
* 0 -> 2: ignored
* 3 -> 19: index in to bounds table
* 20 -> 47: index in to bounds directory
* 48 -> 63: ignored
*/
#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32
#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */
#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8
#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */
#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3
#define MPX_BOUNDS_TABLE_TOP_BIT 19
#define MPX_BOUNDS_DIR_BOTTOM_BIT 20
#define MPX_BOUNDS_DIR_TOP_BIT 47
#endif
#define MPX_BOUNDS_DIR_NR_ENTRIES \
(MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES)
#define MPX_BOUNDS_TABLE_NR_ENTRIES \
(MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES)
#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1
struct mpx_bd_entry {
union {
char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES];
void *contents[0];
};
} __attribute__((packed));
struct mpx_bt_entry {
union {
char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES];
unsigned long contents[0];
};
} __attribute__((packed));
struct mpx_bounds_dir {
struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES];
} __attribute__((packed));
struct mpx_bounds_table {
struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES];
} __attribute__((packed));
static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit)
{
int total_nr_bits = topbit - bottombit;
unsigned long mask = (1UL << total_nr_bits)-1;
return (val >> bottombit) & mask;
}
static inline unsigned long __vaddr_bounds_table_index(void *vaddr)
{
return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT,
MPX_BOUNDS_TABLE_TOP_BIT);
}
static inline unsigned long __vaddr_bounds_directory_index(void *vaddr)
{
return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT,
MPX_BOUNDS_DIR_TOP_BIT);
}
static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr,
struct mpx_bounds_dir *bounds_dir)
{
unsigned long index = __vaddr_bounds_directory_index(vaddr);
return &bounds_dir->entries[index];
}
static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry)
{
unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
}
static inline struct mpx_bounds_table *
__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry)
{
unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents;
assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT);
__bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT;
return (struct mpx_bounds_table *)__bd_entry;
}
static inline struct mpx_bt_entry *
mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir)
{
struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir);
struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde);
unsigned long index = __vaddr_bounds_table_index(vaddr);
return &bt->entries[index];
}
#endif /* _MPX_HW_H */
// SPDX-License-Identifier: GPL-2.0-only
/*
* mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions)
*
* Written by:
* "Ren, Qiaowei" <qiaowei.ren@intel.com>
* "Wei, Gang" <gang.wei@intel.com>
* "Hansen, Dave" <dave.hansen@intel.com>
*/
/*
* 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure
* it works on 32-bit.
*/
int inspect_every_this_many_mallocs = 100;
int zap_all_every_this_many_mallocs = 1000;
#define _GNU_SOURCE
#define _LARGEFILE64_SOURCE
#include <string.h>
#include <stdio.h>
#include <stdint.h>
#include <stdbool.h>
#include <signal.h>
#include <assert.h>
#include <stdlib.h>
#include <ucontext.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>
#include "mpx-hw.h"
#include "mpx-debug.h"
#include "mpx-mm.h"
#ifndef __always_inline
#define __always_inline inline __attribute__((always_inline)
#endif
#ifndef TEST_DURATION_SECS
#define TEST_DURATION_SECS 3
#endif
void write_int_to(char *prefix, char *file, int int_to_write)
{
char buf[100];
int fd = open(file, O_RDWR);
int len;
int ret;
assert(fd >= 0);
len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write);
assert(len >= 0);
assert(len < sizeof(buf));
ret = write(fd, buf, len);
assert(ret == len);
ret = close(fd);
assert(!ret);
}
void write_pid_to(char *prefix, char *file)
{
write_int_to(prefix, file, getpid());
}
void trace_me(void)
{
/* tracing events dir */
#define TED "/sys/kernel/debug/tracing/events/"
/*
write_pid_to("common_pid=", TED "signal/filter");
write_pid_to("common_pid=", TED "exceptions/filter");
write_int_to("", TED "signal/enable", 1);
write_int_to("", TED "exceptions/enable", 1);
*/
write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid");
write_int_to("", "/sys/kernel/debug/tracing/trace", 0);
}
#define test_failed() __test_failed(__FILE__, __LINE__)
static void __test_failed(char *f, int l)
{
fprintf(stderr, "abort @ %s::%d\n", f, l);
abort();
}
/* Error Printf */
#define eprintf(args...) fprintf(stderr, args)
#ifdef __i386__
/* i386 directory size is 4MB */
#define REG_IP_IDX REG_EIP
#define REX_PREFIX
#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate)
/*
* __cpuid() is from the Linux Kernel:
*/
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
asm volatile(
"push %%ebx;"
"cpuid;"
"mov %%ebx, %1;"
"pop %%ebx"
: "=a" (*eax),
"=g" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx));
}
#else /* __i386__ */
#define REG_IP_IDX REG_RIP
#define REX_PREFIX "0x48, "
#define XSAVE_OFFSET_IN_FPMEM 0
/*
* __cpuid() is from the Linux Kernel:
*/
static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
/* ecx is often an input as well as an output. */
asm volatile(
"cpuid;"
: "=a" (*eax),
"=b" (*ebx),
"=c" (*ecx),
"=d" (*edx)
: "0" (*eax), "2" (*ecx));
}
#endif /* !__i386__ */
struct xsave_hdr_struct {
uint64_t xstate_bv;
uint64_t reserved1[2];
uint64_t reserved2[5];
} __attribute__((packed));
struct bndregs_struct {
uint64_t bndregs[8];
} __attribute__((packed));
struct bndcsr_struct {
uint64_t cfg_reg_u;
uint64_t status_reg;
} __attribute__((packed));
struct xsave_struct {
uint8_t fpu_sse[512];
struct xsave_hdr_struct xsave_hdr;
uint8_t ymm[256];
uint8_t lwp[128];
struct bndregs_struct bndregs;
struct bndcsr_struct bndcsr;
} __attribute__((packed));
uint8_t __attribute__((__aligned__(64))) buffer[4096];
struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer;
uint8_t __attribute__((__aligned__(64))) test_buffer[4096];
struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer;
uint64_t num_bnd_chk;
static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask)
{
uint32_t lmask = mask;
uint32_t hmask = mask >> 32;
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static __always_inline void xsave_state_1(void *_fx, uint64_t mask)
{
uint32_t lmask = mask;
uint32_t hmask = mask >> 32;
unsigned char *fx = _fx;
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static inline uint64_t xgetbv(uint32_t index)
{
uint32_t eax, edx;
asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
: "=a" (eax), "=d" (edx)
: "c" (index));
return eax + ((uint64_t)edx << 32);
}
static uint64_t read_mpx_status_sig(ucontext_t *uctxt)
{
memset(buffer, 0, sizeof(buffer));
memcpy(buffer,
(uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM,
sizeof(struct xsave_struct));
return xsave_buf->bndcsr.status_reg;
}
#include <pthread.h>
static uint8_t *get_next_inst_ip(uint8_t *addr)
{
uint8_t *ip = addr;
uint8_t sib;
uint8_t rm;
uint8_t mod;
uint8_t base;
uint8_t modrm;
/* determine the prefix. */
switch(*ip) {
case 0xf2:
case 0xf3:
case 0x66:
ip++;
break;
}
/* look for rex prefix */
if ((*ip & 0x40) == 0x40)
ip++;
/* Make sure we have a MPX instruction. */
if (*ip++ != 0x0f)
return addr;
/* Skip the op code byte. */
ip++;
/* Get the modrm byte. */
modrm = *ip++;
/* Break it down into parts. */
rm = modrm & 7;
mod = (modrm >> 6);
/* Init the parts of the address mode. */
base = 8;
/* Is it a mem mode? */
if (mod != 3) {
/* look for scaled indexed addressing */
if (rm == 4) {
/* SIB addressing */
sib = *ip++;
base = sib & 7;
switch (mod) {
case 0:
if (base == 5)
ip += 4;
break;
case 1:
ip++;
break;
case 2:
ip += 4;
break;
}
} else {
/* MODRM addressing */
switch (mod) {
case 0:
/* DISP32 addressing, no base */
if (rm == 5)
ip += 4;
break;
case 1:
ip++;
break;
case 2:
ip += 4;
break;
}
}
}
return ip;
}
#ifdef si_lower
static inline void *__si_bounds_lower(siginfo_t *si)
{
return si->si_lower;
}
static inline void *__si_bounds_upper(siginfo_t *si)
{
return si->si_upper;
}
#else
/*
* This deals with old version of _sigfault in some distros:
*
old _sigfault:
struct {
void *si_addr;
} _sigfault;
new _sigfault:
struct {
void __user *_addr;
int _trapno;
short _addr_lsb;
union {
struct {
void __user *_lower;
void __user *_upper;
} _addr_bnd;
__u32 _pkey;
};
} _sigfault;
*
*/
static inline void **__si_bounds_hack(siginfo_t *si)
{
void *sigfault = &si->_sifields._sigfault;
void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault);
int *trapno = (int*)end_sigfault;
/* skip _trapno and _addr_lsb */
void **__si_lower = (void**)(trapno + 2);
return __si_lower;
}
static inline void *__si_bounds_lower(siginfo_t *si)
{
return *__si_bounds_hack(si);
}
static inline void *__si_bounds_upper(siginfo_t *si)
{
return *(__si_bounds_hack(si) + 1);
}
#endif
static int br_count;
static int expected_bnd_index = -1;
uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */
unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS];
/* Failed address bound checks: */
#ifndef SEGV_BNDERR
# define SEGV_BNDERR 3
#endif
/*
* The kernel is supposed to provide some information about the bounds
* exception in the siginfo. It should match what we have in the bounds
* registers that we are checking against. Just check against the shadow copy
* since it is easily available, and we also check that *it* matches the real
* registers.
*/
void check_siginfo_vs_shadow(siginfo_t* si)
{
int siginfo_ok = 1;
void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0];
void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1];
if ((expected_bnd_index < 0) ||
(expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) {
fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n",
expected_bnd_index);
exit(6);
}
if (__si_bounds_lower(si) != shadow_lower)
siginfo_ok = 0;
if (__si_bounds_upper(si) != shadow_upper)
siginfo_ok = 0;
if (!siginfo_ok) {
fprintf(stderr, "ERROR: siginfo bounds do not match "
"shadow bounds for register %d\n", expected_bnd_index);
exit(7);
}
}
void handler(int signum, siginfo_t *si, void *vucontext)
{
int i;
ucontext_t *uctxt = vucontext;
int trapno;
unsigned long ip;
dprintf1("entered signal handler\n");
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
if (trapno == 5) {
typeof(si->si_addr) *si_addr_ptr = &si->si_addr;
uint64_t status = read_mpx_status_sig(uctxt);
uint64_t br_reason = status & 0x3;
br_count++;
dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count);
dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n",
status, ip, br_reason);
dprintf2("si_signo: %d\n", si->si_signo);
dprintf2(" signum: %d\n", signum);
dprintf2("info->si_code == SEGV_BNDERR: %d\n",
(si->si_code == SEGV_BNDERR));
dprintf2("info->si_code: %d\n", si->si_code);
dprintf2("info->si_lower: %p\n", __si_bounds_lower(si));
dprintf2("info->si_upper: %p\n", __si_bounds_upper(si));
for (i = 0; i < 8; i++)
dprintf3("[%d]: %p\n", i, si_addr_ptr[i]);
switch (br_reason) {
case 0: /* traditional BR */
fprintf(stderr,
"Undefined status with bound exception:%jx\n",
status);
exit(5);
case 1: /* #BR MPX bounds exception */
/* these are normal and we expect to see them */
check_siginfo_vs_shadow(si);
dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n",
status, (void *)ip, si->si_addr);
num_bnd_chk++;
uctxt->uc_mcontext.gregs[REG_IP_IDX] =
(greg_t)get_next_inst_ip((uint8_t *)ip);
break;
case 2:
fprintf(stderr, "#BR status == 2, missing bounds table,"
"kernel should have handled!!\n");
exit(4);
break;
default:
fprintf(stderr, "bound check error: status 0x%jx at %p\n",
status, (void *)ip);
num_bnd_chk++;
uctxt->uc_mcontext.gregs[REG_IP_IDX] =
(greg_t)get_next_inst_ip((uint8_t *)ip);
fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr);
exit(3);
}
} else if (trapno == 14) {
eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n",
trapno, ip);
eprintf("si_addr %p\n", si->si_addr);
eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
test_failed();
} else {
eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip);
eprintf("si_addr %p\n", si->si_addr);
eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]);
test_failed();
}
}
static inline void cpuid_count(unsigned int op, int count,
unsigned int *eax, unsigned int *ebx,
unsigned int *ecx, unsigned int *edx)
{
*eax = op;
*ecx = count;
__cpuid(eax, ebx, ecx, edx);
}
#define XSTATE_CPUID 0x0000000d
/*
* List of XSAVE features Linux knows about:
*/
enum xfeature_bit {
XSTATE_BIT_FP,
XSTATE_BIT_SSE,
XSTATE_BIT_YMM,
XSTATE_BIT_BNDREGS,
XSTATE_BIT_BNDCSR,
XSTATE_BIT_OPMASK,
XSTATE_BIT_ZMM_Hi256,
XSTATE_BIT_Hi16_ZMM,
XFEATURES_NR_MAX,
};
#define XSTATE_FP (1 << XSTATE_BIT_FP)
#define XSTATE_SSE (1 << XSTATE_BIT_SSE)
#define XSTATE_YMM (1 << XSTATE_BIT_YMM)
#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS)
#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR)
#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK)
#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256)
#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM)
#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */
bool one_bit(unsigned int x, int bit)
{
return !!(x & (1<<bit));
}
void print_state_component(int state_bit_nr, char *name)
{
unsigned int eax, ebx, ecx, edx;
unsigned int state_component_size;
unsigned int state_component_supervisor;
unsigned int state_component_user;
unsigned int state_component_aligned;
/* See SDM Section 13.2 */
cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx);
assert(eax || ebx || ecx);
state_component_size = eax;
state_component_supervisor = ((!ebx) && one_bit(ecx, 0));
state_component_user = !one_bit(ecx, 0);
state_component_aligned = one_bit(ecx, 1);
printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n",
name,
state_component_size, state_component_user,
state_component_supervisor, state_component_aligned);
}
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */
#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */
bool check_mpx_support(void)
{
unsigned int eax, ebx, ecx, edx;
cpuid_count(1, 0, &eax, &ebx, &ecx, &edx);
/* We can't do much without XSAVE, so just make these assert()'s */
if (!one_bit(ecx, XSAVE_FEATURE_BIT)) {
fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n");
exit(0);
}
if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) {
fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n");
exit(0);
}
/* CPUs not supporting the XSTATE CPUID leaf do not support MPX */
/* Is this redundant with the feature bit checks? */
cpuid_count(0, 0, &eax, &ebx, &ecx, &edx);
if (eax < XSTATE_CPUID) {
fprintf(stderr, "processor lacks XSTATE CPUID leaf,"
" can not run MPX tests\n");
exit(0);
}
printf("XSAVE is supported by HW & OS\n");
cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx);
printf("XSAVE processor supported state mask: 0x%x\n", eax);
printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0));
/* Make sure that the MPX states are enabled in in XCR0 */
if ((eax & MPX_XSTATES) != MPX_XSTATES) {
fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n");
exit(0);
}
/* Make sure the MPX states are supported by XSAVE* */
if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) {
fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, "
"can not run MPX tests\n");
exit(0);
}
print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS");
print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR");
return true;
}
void enable_mpx(void *l1base)
{
/* enable point lookup */
memset(buffer, 0, sizeof(buffer));
xrstor_state(xsave_buf, 0x18);
xsave_buf->xsave_hdr.xstate_bv = 0x10;
xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1;
xsave_buf->bndcsr.status_reg = 0;
dprintf2("bf xrstor\n");
dprintf2("xsave cndcsr: status %jx, configu %jx\n",
xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
xrstor_state(xsave_buf, 0x18);
dprintf2("after xrstor\n");
xsave_state_1(xsave_buf, 0x18);
dprintf1("xsave bndcsr: status %jx, configu %jx\n",
xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u);
}
#include <sys/prctl.h>
struct mpx_bounds_dir *bounds_dir_ptr;
unsigned long __bd_incore(const char *func, int line)
{
unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES);
return ret;
}
#define bd_incore() __bd_incore(__func__, __LINE__)
void check_clear(void *ptr, unsigned long sz)
{
unsigned long *i;
for (i = ptr; (void *)i < ptr + sz; i++) {
if (*i) {
dprintf1("%p is NOT clear at %p\n", ptr, i);
assert(0);
}
}
dprintf1("%p is clear for %lx\n", ptr, sz);
}
void check_clear_bd(void)
{
check_clear(bounds_dir_ptr, 2UL << 30);
}
#define USE_MALLOC_FOR_BOUNDS_DIR 1
bool process_specific_init(void)
{
unsigned long size;
unsigned long *dir;
/* Guarantee we have the space to align it, add padding: */
unsigned long pad = getpagesize();
size = 2UL << 30; /* 2GB */
if (sizeof(unsigned long) == 4)
size = 4UL << 20; /* 4MB */
dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20));
if (USE_MALLOC_FOR_BOUNDS_DIR) {
unsigned long _dir;
dir = malloc(size + pad);
assert(dir);
_dir = (unsigned long)dir;
_dir += 0xfffUL;
_dir &= ~0xfffUL;
dir = (void *)_dir;
} else {
/*
* This makes debugging easier because the address
* calculations are simpler:
*/
dir = mmap((void *)0x200000000000, size + pad,
PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (dir == (void *)-1) {
perror("unable to allocate bounds directory");
abort();
}
check_clear(dir, size);
}
bounds_dir_ptr = (void *)dir;
madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE);
bd_incore();
dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr,
(char *)bounds_dir_ptr + size);
check_clear(dir, size);
enable_mpx(dir);
check_clear(dir, size);
if (prctl(43, 0, 0, 0, 0)) {
printf("no MPX support\n");
abort();
return false;
}
return true;
}
bool process_specific_finish(void)
{
if (prctl(44)) {
printf("no MPX support\n");
return false;
}
return true;
}
void setup_handler()
{
int r, rs;
struct sigaction newact;
struct sigaction oldact;
/* #BR is mapped to sigsegv */
int signum = SIGSEGV;
newact.sa_handler = 0; /* void(*)(int)*/
newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */
/*sigset_t - signals to block while in the handler */
/* get the old signal mask. */
rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
assert(rs == 0);
/* call sa_sigaction, not sa_handler*/
newact.sa_flags = SA_SIGINFO;
newact.sa_restorer = 0; /* void(*)(), obsolete */
r = sigaction(signum, &newact, &oldact);
assert(r == 0);
}
void mpx_prepare(void)
{
dprintf2("%s()\n", __func__);
setup_handler();
process_specific_init();
}
void mpx_cleanup(void)
{
printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk);
process_specific_finish();
}
/*-------------- the following is test case ---------------*/
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
uint64_t num_lower_brs;
uint64_t num_upper_brs;
#define MPX_CONFIG_OFFSET 1024
#define MPX_BOUNDS_OFFSET 960
#define MPX_HEADER_OFFSET 512
#define MAX_ADDR_TESTED (1<<28)
#define TEST_ROUNDS 100
/*
0F 1A /r BNDLDX-Load
0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation
66 0F 1A /r BNDMOV bnd1, bnd2/m128
66 0F 1B /r BNDMOV bnd1/m128, bnd2
F2 0F 1A /r BNDCU bnd, r/m64
F2 0F 1B /r BNDCN bnd, r/m64
F3 0F 1A /r BNDCL bnd, r/m64
F3 0F 1B /r BNDMK bnd, m64
*/
static __always_inline void xsave_state(void *_fx, uint64_t mask)
{
uint32_t lmask = mask;
uint32_t hmask = mask >> 32;
unsigned char *fx = _fx;
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static __always_inline void mpx_clear_bnd0(void)
{
long size = 0;
void *ptr = NULL;
/* F3 0F 1B /r BNDMK bnd, m64 */
/* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
: : "c" (ptr), "d" (size-1)
: "memory");
}
static __always_inline void mpx_make_bound_helper(unsigned long ptr,
unsigned long size)
{
/* F3 0F 1B /r BNDMK bnd, m64 */
/* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */
asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t"
: : "c" (ptr), "d" (size-1)
: "memory");
}
static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr)
{
/* F3 0F 1A /r NDCL bnd, r/m64 */
/* f3 0f 1a 01 bndcl (%rcx),%bnd0 */
asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t"
: : "c" (ptr)
: "memory");
}
static __always_inline void mpx_check_upperbound_helper(unsigned long ptr)
{
/* F2 0F 1A /r BNDCU bnd, r/m64 */
/* f2 0f 1a 01 bndcu (%rcx),%bnd0 */
asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t"
: : "c" (ptr)
: "memory");
}
static __always_inline void mpx_movbndreg_helper()
{
/* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
/* 66 0f 1b c2 bndmov %bnd0,%bnd2 */
asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t");
}
static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem)
{
/* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */
/* 66 0f 1b 01 bndmov %bnd0,(%rcx) */
asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t"
: : "c" (mem)
: "memory");
}
static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem)
{
/* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */
/* 66 0f 1a 01 bndmov (%rcx),%bnd0 */
asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t"
: : "c" (mem)
: "memory");
}
static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr,
unsigned long ptr_val)
{
/* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */
/* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */
asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t"
: : "c" (ptr_addr), "d" (ptr_val)
: "memory");
}
static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr,
unsigned long ptr_val)
{
/* 0F 1A /r BNDLDX-Load */
/*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */
asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t"
: : "c" (ptr_addr), "d" (ptr_val)
: "memory");
}
void __print_context(void *__print_xsave_buffer, int line)
{
uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET);
uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET);
int i;
eprintf("%s()::%d\n", "print_context", line);
for (i = 0; i < 4; i++) {
eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i,
(unsigned long)bounds[i*2],
~(unsigned long)bounds[i*2+1],
(unsigned long)bounds[i*2+1]);
}
eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]);
}
#define print_context(x) __print_context(x, __LINE__)
#ifdef DEBUG
#define dprint_context(x) print_context(x)
#else
#define dprint_context(x) do{}while(0)
#endif
void init()
{
int i;
srand((unsigned int)time(NULL));
for (i = 0; i < 4; i++) {
shadow_plb[i][0] = 0;
shadow_plb[i][1] = ~(unsigned long)0;
}
}
long int __mpx_random(int line)
{
#ifdef NOT_SO_RANDOM
static long fake = 722122311;
fake += 563792075;
return fakse;
#else
return random();
#endif
}
#define mpx_random() __mpx_random(__LINE__)
uint8_t *get_random_addr()
{
uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED);
return (addr - (unsigned long)addr % sizeof(uint8_t *));
}
static inline bool compare_context(void *__xsave_buffer)
{
uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET);
int i;
for (i = 0; i < 4; i++) {
dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n",
i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]);
if ((shadow_plb[i][0] != bounds[i*2]) ||
(shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) {
eprintf("ERROR comparing shadow to real bound register %d\n", i);
eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n",
(unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1],
(unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]);
return false;
}
}
return true;
}
void mkbnd_shadow(uint8_t *ptr, int index, long offset)
{
uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]);
*lower = (unsigned long)ptr;
*upper = (unsigned long)ptr + offset - 1;
}
void check_lowerbound_shadow(uint8_t *ptr, int index)
{
uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]);
if (*lower > (uint64_t)(unsigned long)ptr)
num_lower_brs++;
else
dprintf1("LowerBoundChk passed:%p\n", ptr);
}
void check_upperbound_shadow(uint8_t *ptr, int index)
{
uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]);
if (upper < (uint64_t)(unsigned long)ptr)
num_upper_brs++;
else
dprintf1("UpperBoundChk passed:%p\n", ptr);
}
__always_inline void movbndreg_shadow(int src, int dest)
{
shadow_plb[dest][0] = shadow_plb[src][0];
shadow_plb[dest][1] = shadow_plb[src][1];
}
__always_inline void movbnd2mem_shadow(int src, unsigned long *dest)
{
unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]);
unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]);
*dest = *lower;
*(dest+1) = *upper;
}
__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest)
{
unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]);
unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]);
*lower = *src;
*upper = *(src+1);
}
__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
{
shadow_map[0] = (unsigned long)shadow_plb[index][0];
shadow_map[1] = (unsigned long)shadow_plb[index][1];
shadow_map[2] = (unsigned long)ptr_val;
dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__,
index, ptr, ptr_val, ptr_val);
/*ptr ignored */
}
void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val)
{
uint64_t lower = shadow_map[0];
uint64_t upper = shadow_map[1];
uint8_t *value = (uint8_t *)shadow_map[2];
if (value != ptr_val) {
dprintf2("%s(%d, %p, %p) init shadow bounds[%d] "
"because %p != %p\n", __func__, index, ptr,
ptr_val, index, value, ptr_val);
shadow_plb[index][0] = 0;
shadow_plb[index][1] = ~(unsigned long)0;
} else {
shadow_plb[index][0] = lower;
shadow_plb[index][1] = upper;
}
/* ptr ignored */
}
static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr)
{
mpx_make_bound_helper((unsigned long)ptr, 0x1800);
}
static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr)
{
mkbnd_shadow(ptr, 0, 0x1800);
}
static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr)
{
/* these are hard-coded to check bnd0 */
expected_bnd_index = 0;
mpx_check_lowerbound_helper((unsigned long)(ptr-1));
mpx_check_upperbound_helper((unsigned long)(ptr+0x1800));
/* reset this since we do not expect any more bounds exceptions */
expected_bnd_index = -1;
}
static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr)
{
check_lowerbound_shadow(ptr-1, 0);
check_upperbound_shadow(ptr+0x1800, 0);
}
static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr)
{
mpx_make_bound_helper((unsigned long)ptr, 0x1800);
mpx_movbndreg_helper();
mpx_movbnd2mem_helper(buf);
mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
}
static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr)
{
mkbnd_shadow(ptr, 0, 0x1800);
movbndreg_shadow(0, 2);
movbnd2mem_shadow(0, (unsigned long *)buf);
mkbnd_shadow(ptr+0x12, 0, 0x1800);
}
static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr)
{
mpx_movbnd_from_mem_helper(buf);
}
static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr)
{
movbnd_from_mem_shadow((unsigned long *)buf, 0);
}
static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr)
{
mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr);
mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800);
}
static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr)
{
stdsc_shadow(0, buf, ptr);
mkbnd_shadow(ptr+0x12, 0, 0x1800);
}
static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr)
{
mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr);
}
static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr)
{
lddsc_shadow(0, buf, ptr);
}
#define NR_MPX_TEST_FUNCTIONS 6
/*
* For compatibility reasons, MPX will clear the bounds registers
* when you make function calls (among other things). We have to
* preserve the registers in between calls to the "helpers" since
* they build on each other.
*
* Be very careful not to make any function calls inside the
* helpers, or anywhere else beween the xrstor and xsave.
*/
#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \
xrstor_state(xsave_test_buf, flags); \
mpx_test_helper##helper_nr(buf, ptr); \
xsave_state(xsave_test_buf, flags); \
mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \
} while (0)
static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr)
{
uint64_t flags = 0x18;
dprint_context(xsave_test_buf);
switch (nr) {
case 0:
run_helper(0, buf, buf_shadow, ptr);
break;
case 1:
run_helper(1, buf, buf_shadow, ptr);
break;
case 2:
run_helper(2, buf, buf_shadow, ptr);
break;
case 3:
run_helper(3, buf, buf_shadow, ptr);
break;
case 4:
run_helper(4, buf, buf_shadow, ptr);
break;
case 5:
run_helper(5, buf, buf_shadow, ptr);
break;
default:
test_failed();
break;
}
dprint_context(xsave_test_buf);
}
unsigned long buf_shadow[1024]; /* used to check load / store descriptors */
extern long inspect_me(struct mpx_bounds_dir *bounds_dir);
long cover_buf_with_bt_entries(void *buf, long buf_len)
{
int i;
long nr_to_fill;
int ratio = 1000;
unsigned long buf_len_in_ptrs;
/* Fill about 1/100 of the space with bt entries */
nr_to_fill = buf_len / (sizeof(unsigned long) * ratio);
if (!nr_to_fill)
dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill);
/* Align the buffer to pointer size */
while (((unsigned long)buf) % sizeof(void *)) {
buf++;
buf_len--;
}
/* We are storing pointers, so make */
buf_len_in_ptrs = buf_len / sizeof(void *);
for (i = 0; i < nr_to_fill; i++) {
long index = (mpx_random() % buf_len_in_ptrs);
void *ptr = buf + index * sizeof(unsigned long);
unsigned long ptr_addr = (unsigned long)ptr;
/* ptr and size can be anything */
mpx_make_bound_helper((unsigned long)ptr, 8);
/*
* take bnd0 and put it in to bounds tables "buf + index" is an
* address inside the buffer where we are pretending that we
* are going to put a pointer We do not, though because we will
* never load entries from the table, so it doesn't matter.
*/
mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr);
dprintf4("storing bound table entry for %lx (buf start @ %p)\n",
ptr_addr, buf);
}
return nr_to_fill;
}
unsigned long align_down(unsigned long alignme, unsigned long align_to)
{
return alignme & ~(align_to-1);
}
unsigned long align_up(unsigned long alignme, unsigned long align_to)
{
return (alignme + align_to - 1) & ~(align_to-1);
}
/*
* Using 1MB alignment guarantees that each no allocation
* will overlap with another's bounds tables.
*
* We have to cook our own allocator here. malloc() can
* mix other allocation with ours which means that even
* if we free all of our allocations, there might still
* be bounds tables for the *areas* since there is other
* valid memory there.
*
* We also can't use malloc() because a free() of an area
* might not free it back to the kernel. We want it
* completely unmapped an malloc() does not guarantee
* that.
*/
#ifdef __i386__
long alignment = 4096;
long sz_alignment = 4096;
#else
long alignment = 1 * MB;
long sz_alignment = 1 * MB;
#endif
void *mpx_mini_alloc(unsigned long sz)
{
unsigned long long tries = 0;
static void *last;
void *ptr;
void *try_at;
sz = align_up(sz, sz_alignment);
try_at = last + alignment;
while (1) {
ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (ptr == (void *)-1)
return NULL;
if (ptr == try_at)
break;
munmap(ptr, sz);
try_at += alignment;
#ifdef __i386__
/*
* This isn't quite correct for 32-bit binaries
* on 64-bit kernels since they can use the
* entire 32-bit address space, but it's close
* enough.
*/
if (try_at > (void *)0xC0000000)
#else
if (try_at > (void *)0x0000800000000000)
#endif
try_at = (void *)0x0;
if (!(++tries % 10000))
dprintf1("stuck in %s(), tries: %lld\n", __func__, tries);
continue;
}
last = ptr;
dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr);
return ptr;
}
void mpx_mini_free(void *ptr, long sz)
{
dprintf2("%s() ptr: %p\n", __func__, ptr);
if ((unsigned long)ptr > 0x100000000000) {
dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr);
test_failed();
}
sz = align_up(sz, sz_alignment);
dprintf3("%s() ptr: %p before munmap\n", __func__, ptr);
munmap(ptr, sz);
dprintf3("%s() ptr: %p DONE\n", __func__, ptr);
}
#define NR_MALLOCS 100
struct one_malloc {
char *ptr;
int nr_filled_btes;
unsigned long size;
};
struct one_malloc mallocs[NR_MALLOCS];
void free_one_malloc(int index)
{
unsigned long free_ptr;
unsigned long mask;
if (!mallocs[index].ptr)
return;
mpx_mini_free(mallocs[index].ptr, mallocs[index].size);
dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr);
free_ptr = (unsigned long)mallocs[index].ptr;
mask = alignment-1;
dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr,
(free_ptr & mask), mask);
assert((free_ptr & mask) == 0);
mallocs[index].ptr = NULL;
}
#ifdef __i386__
#define MPX_BOUNDS_TABLE_COVERS 4096
#else
#define MPX_BOUNDS_TABLE_COVERS (1 * MB)
#endif
void zap_everything(void)
{
long after_zap;
long before_zap;
int i;
before_zap = inspect_me(bounds_dir_ptr);
dprintf1("zapping everything start: %ld\n", before_zap);
for (i = 0; i < NR_MALLOCS; i++)
free_one_malloc(i);
after_zap = inspect_me(bounds_dir_ptr);
dprintf1("zapping everything done: %ld\n", after_zap);
/*
* We only guarantee to empty the thing out if our allocations are
* exactly aligned on the boundaries of a boudns table.
*/
if ((alignment >= MPX_BOUNDS_TABLE_COVERS) &&
(sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) {
if (after_zap != 0)
test_failed();
assert(after_zap == 0);
}
}
void do_one_malloc(void)
{
static int malloc_counter;
long sz;
int rand_index = (mpx_random() % NR_MALLOCS);
void *ptr = mallocs[rand_index].ptr;
dprintf3("%s() enter\n", __func__);
if (ptr) {
dprintf3("freeing one malloc at index: %d\n", rand_index);
free_one_malloc(rand_index);
if (mpx_random() % (NR_MALLOCS*3) == 3) {
int i;
dprintf3("zapping some more\n");
for (i = rand_index; i < NR_MALLOCS; i++)
free_one_malloc(i);
}
if ((mpx_random() % zap_all_every_this_many_mallocs) == 4)
zap_everything();
}
/* 1->~1M */
sz = (1 + mpx_random() % 1000) * 1000;
ptr = mpx_mini_alloc(sz);
if (!ptr) {
/*
* If we are failing allocations, just assume we
* are out of memory and zap everything.
*/
dprintf3("zapping everything because out of memory\n");
zap_everything();
goto out;
}
dprintf3("malloc: %p size: 0x%lx\n", ptr, sz);
mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz);
mallocs[rand_index].ptr = ptr;
mallocs[rand_index].size = sz;
out:
if ((++malloc_counter) % inspect_every_this_many_mallocs == 0)
inspect_me(bounds_dir_ptr);
}
void run_timed_test(void (*test_func)(void))
{
int done = 0;
long iteration = 0;
static time_t last_print;
time_t now;
time_t start;
time(&start);
while (!done) {
time(&now);
if ((now - start) > TEST_DURATION_SECS)
done = 1;
test_func();
iteration++;
if ((now - last_print > 1) || done) {
printf("iteration %ld complete, OK so far\n", iteration);
last_print = now;
}
}
}
void check_bounds_table_frees(void)
{
printf("executing unmaptest\n");
inspect_me(bounds_dir_ptr);
run_timed_test(&do_one_malloc);
printf("done with malloc() fun\n");
}
void insn_test_failed(int test_nr, int test_round, void *buf,
void *buf_shadow, void *ptr)
{
print_context(xsave_test_buf);
eprintf("ERROR: test %d round %d failed\n", test_nr, test_round);
while (test_nr == 5) {
struct mpx_bt_entry *bte;
struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr;
struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd);
printf(" bd: %p\n", bd);
printf("&bde: %p\n", bde);
printf("*bde: %lx\n", *(unsigned long *)bde);
if (!bd_entry_valid(bde))
break;
bte = mpx_vaddr_to_bt_entry(buf, bd);
printf(" te: %p\n", bte);
printf("bte[0]: %lx\n", bte->contents[0]);
printf("bte[1]: %lx\n", bte->contents[1]);
printf("bte[2]: %lx\n", bte->contents[2]);
printf("bte[3]: %lx\n", bte->contents[3]);
break;
}
test_failed();
}
void check_mpx_insns_and_tables(void)
{
int successes = 0;
int failures = 0;
int buf_size = (1024*1024);
unsigned long *buf = malloc(buf_size);
const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS;
int i, j;
memset(buf, 0, buf_size);
memset(buf_shadow, 0, sizeof(buf_shadow));
for (i = 0; i < TEST_ROUNDS; i++) {
uint8_t *ptr = get_random_addr() + 8;
for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) {
if (0 && j != 5) {
successes++;
continue;
}
dprintf2("starting test %d round %d\n", j, i);
dprint_context(xsave_test_buf);
/*
* test5 loads an address from the bounds tables.
* The load will only complete if 'ptr' matches
* the load and the store, so with random addrs,
* the odds of this are very small. Make it
* higher by only moving 'ptr' 1/10 times.
*/
if (random() % 10 <= 0)
ptr = get_random_addr() + 8;
dprintf3("random ptr{%p}\n", ptr);
dprint_context(xsave_test_buf);
run_helpers(j, (void *)buf, (void *)buf_shadow, ptr);
dprint_context(xsave_test_buf);
if (!compare_context(xsave_test_buf)) {
insn_test_failed(j, i, buf, buf_shadow, ptr);
failures++;
goto exit;
}
successes++;
dprint_context(xsave_test_buf);
dprintf2("finished test %d round %d\n", j, i);
dprintf3("\n");
dprint_context(xsave_test_buf);
}
}
exit:
dprintf2("\nabout to free:\n");
free(buf);
dprintf1("successes: %d\n", successes);
dprintf1(" failures: %d\n", failures);
dprintf1(" tests: %d\n", total_nr_tests);
dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
dprintf1(" saw: %d #BRs\n", br_count);
if (failures) {
eprintf("ERROR: non-zero number of failures\n");
exit(20);
}
if (successes != total_nr_tests) {
eprintf("ERROR: succeeded fewer than number of tries (%d != %d)\n",
successes, total_nr_tests);
exit(21);
}
if (num_upper_brs + num_lower_brs != br_count) {
eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n",
num_upper_brs, num_lower_brs, br_count);
eprintf("successes: %d\n", successes);
eprintf(" failures: %d\n", failures);
eprintf(" tests: %d\n", total_nr_tests);
eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs);
eprintf(" saw: %d #BRs\n", br_count);
exit(22);
}
}
/*
* This is supposed to SIGSEGV nicely once the kernel
* can no longer allocate vaddr space.
*/
void exhaust_vaddr_space(void)
{
unsigned long ptr;
/* Try to make sure there is no room for a bounds table anywhere */
unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE;
#ifdef __i386__
unsigned long max_vaddr = 0xf7788000UL;
#else
unsigned long max_vaddr = 0x800000000000UL;
#endif
dprintf1("%s() start\n", __func__);
/* do not start at 0, we aren't allowed to map there */
for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
void *ptr_ret;
int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL);
if (!ret) {
dprintf1("madvise() %lx ret: %d\n", ptr, ret);
continue;
}
ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (ptr_ret != (void *)ptr) {
perror("mmap");
dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
break;
}
if (!(ptr & 0xffffff))
dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret);
}
for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) {
dprintf2("covering 0x%lx with bounds table entries\n", ptr);
cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE);
}
dprintf1("%s() end\n", __func__);
printf("done with vaddr space fun\n");
}
void mpx_table_test(void)
{
printf("starting mpx bounds table test\n");
run_timed_test(check_mpx_insns_and_tables);
printf("done with mpx bounds table test\n");
}
int main(int argc, char **argv)
{
int unmaptest = 0;
int vaddrexhaust = 0;
int tabletest = 0;
int i;
check_mpx_support();
mpx_prepare();
srandom(11179);
bd_incore();
init();
bd_incore();
trace_me();
xsave_state((void *)xsave_test_buf, 0x1f);
if (!compare_context(xsave_test_buf))
printf("Init failed\n");
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "unmaptest"))
unmaptest = 1;
if (!strcmp(argv[i], "vaddrexhaust"))
vaddrexhaust = 1;
if (!strcmp(argv[i], "tabletest"))
tabletest = 1;
}
if (!(unmaptest || vaddrexhaust || tabletest)) {
unmaptest = 1;
/* vaddrexhaust = 1; */
tabletest = 1;
}
if (unmaptest)
check_bounds_table_frees();
if (tabletest)
mpx_table_test();
if (vaddrexhaust)
exhaust_vaddr_space();
printf("%s completed successfully\n", argv[0]);
exit(0);
}
#include "mpx-dig.c"
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MPX_MM_H
#define _MPX_MM_H
#define PAGE_SIZE 4096
#define MB (1UL<<20)
extern long nr_incore(void *ptr, unsigned long size_bytes);
#endif /* _MPX_MM_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment