Commit 98a05fe8 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "x86:

   - Do not register IRQ bypass consumer if posted interrupts not
     supported

   - Fix missed device interrupt due to non-atomic update of IRR

   - Use GFP_KERNEL_ACCOUNT for pid_table in ipiv

   - Make VMREAD error path play nice with noinstr

   - x86: Acquire SRCU read lock when handling fastpath MSR writes

   - Support linking rseq tests statically against glibc 2.35+

   - Fix reference count for stats file descriptors

   - Detect userspace setting invalid CR0

  Non-KVM:

   - Remove coccinelle script that has caused multiple confusion
     ("debugfs, coccinelle: check for obsolete DEFINE_SIMPLE_ATTRIBUTE()
     usage", acked by Greg)"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (21 commits)
  KVM: selftests: Expand x86's sregs test to cover illegal CR0 values
  KVM: VMX: Don't fudge CR0 and CR4 for restricted L2 guest
  KVM: x86: Disallow KVM_SET_SREGS{2} if incoming CR0 is invalid
  Revert "debugfs, coccinelle: check for obsolete DEFINE_SIMPLE_ATTRIBUTE() usage"
  KVM: selftests: Verify stats fd is usable after VM fd has been closed
  KVM: selftests: Verify stats fd can be dup()'d and read
  KVM: selftests: Verify userspace can create "redundant" binary stats files
  KVM: selftests: Explicitly free vcpus array in binary stats test
  KVM: selftests: Clean up stats fd in common stats_test() helper
  KVM: selftests: Use pread() to read binary stats header
  KVM: Grab a reference to KVM for VM and vCPU stats file descriptors
  selftests/rseq: Play nice with binaries statically linked against glibc 2.35+
  Revert "KVM: SVM: Skip WRMSR fastpath on VM-Exit if next RIP isn't valid"
  KVM: x86: Acquire SRCU read lock when handling fastpath MSR writes
  KVM: VMX: Use vmread_error() to report VM-Fail in "goto" path
  KVM: VMX: Make VMREAD error path play nice with noinstr
  KVM: x86/irq: Conditionally register IRQ bypass consumer again
  KVM: X86: Use GFP_KERNEL_ACCOUNT for pid_table in ipiv
  KVM: x86: check the kvm_cpu_get_interrupt result before using it
  KVM: x86: VMX: set irr_pending in kvm_apic_update_irr
  ...
parents c959e900 5a759117
......@@ -37,6 +37,7 @@ KVM_X86_OP(get_segment)
KVM_X86_OP(get_cpl)
KVM_X86_OP(set_segment)
KVM_X86_OP(get_cs_db_l_bits)
KVM_X86_OP(is_valid_cr0)
KVM_X86_OP(set_cr0)
KVM_X86_OP_OPTIONAL(post_set_cr3)
KVM_X86_OP(is_valid_cr4)
......
......@@ -1566,9 +1566,10 @@ struct kvm_x86_ops {
void (*set_segment)(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l);
bool (*is_valid_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0);
void (*post_set_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3);
bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr0);
bool (*is_valid_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
void (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4);
int (*set_efer)(struct kvm_vcpu *vcpu, u64 efer);
void (*get_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
......
......@@ -637,16 +637,22 @@ bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
*max_irr = -1;
for (i = vec = 0; i <= 7; i++, vec += 32) {
u32 *p_irr = (u32 *)(regs + APIC_IRR + i * 0x10);
irr_val = *p_irr;
pir_val = READ_ONCE(pir[i]);
irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
if (pir_val) {
pir_val = xchg(&pir[i], 0);
prev_irr_val = irr_val;
irr_val |= xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
if (prev_irr_val != irr_val) {
max_updated_irr =
__fls(irr_val ^ prev_irr_val) + vec;
}
do {
irr_val = prev_irr_val | pir_val;
} while (prev_irr_val != irr_val &&
!try_cmpxchg(p_irr, &prev_irr_val, irr_val));
if (prev_irr_val != irr_val)
max_updated_irr = __fls(irr_val ^ prev_irr_val) + vec;
}
if (irr_val)
*max_irr = __fls(irr_val) + vec;
......@@ -660,8 +666,11 @@ EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
{
struct kvm_lapic *apic = vcpu->arch.apic;
bool irr_updated = __kvm_apic_update_irr(pir, apic->regs, max_irr);
return __kvm_apic_update_irr(pir, apic->regs, max_irr);
if (unlikely(!apic->apicv_active && irr_updated))
apic->irr_pending = true;
return irr_updated;
}
EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
......
......@@ -1786,6 +1786,11 @@ static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
}
}
static bool svm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
return true;
}
void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_svm *svm = to_svm(vcpu);
......@@ -3986,14 +3991,8 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
/*
* Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
* can't read guest memory (dereference memslots) to decode the WRMSR.
*/
if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
nrips && control->next_rip)
if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
to_svm(vcpu)->vmcb->control.exit_info_1)
return handle_fastpath_set_msr_irqoff(vcpu);
return EXIT_FASTPATH_NONE;
......@@ -4815,6 +4814,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.set_segment = svm_set_segment,
.get_cpl = svm_get_cpl,
.get_cs_db_l_bits = svm_get_cs_db_l_bits,
.is_valid_cr0 = svm_is_valid_cr0,
.set_cr0 = svm_set_cr0,
.post_set_cr3 = sev_post_set_cr3,
.is_valid_cr4 = svm_is_valid_cr4,
......
......@@ -303,10 +303,8 @@ SYM_FUNC_START(vmx_do_nmi_irqoff)
VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
SYM_FUNC_END(vmx_do_nmi_irqoff)
.section .text, "ax"
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
/**
* vmread_error_trampoline - Trampoline from inline asm to vmread_error()
* @field: VMCS field encoding that failed
......@@ -335,7 +333,7 @@ SYM_FUNC_START(vmread_error_trampoline)
mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1
call vmread_error
call vmread_error_trampoline2
/* Zero out @fault, which will be popped into the result register. */
_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
......@@ -357,6 +355,8 @@ SYM_FUNC_START(vmread_error_trampoline)
SYM_FUNC_END(vmread_error_trampoline)
#endif
.section .text, "ax"
SYM_FUNC_START(vmx_do_interrupt_irqoff)
VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
SYM_FUNC_END(vmx_do_interrupt_irqoff)
......@@ -441,14 +441,24 @@ do { \
pr_warn_ratelimited(fmt); \
} while (0)
void vmread_error(unsigned long field, bool fault)
noinline void vmread_error(unsigned long field)
{
if (fault)
kvm_spurious_fault();
else
vmx_insn_failed("vmread failed: field=%lx\n", field);
}
#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
noinstr void vmread_error_trampoline2(unsigned long field, bool fault)
{
if (fault) {
kvm_spurious_fault();
} else {
instrumentation_begin();
vmread_error(field);
instrumentation_end();
}
}
#endif
noinline void vmwrite_error(unsigned long field, unsigned long value)
{
vmx_insn_failed("vmwrite failed: field=%lx val=%lx err=%u\n",
......@@ -1503,6 +1513,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long old_rflags;
/*
* Unlike CR0 and CR4, RFLAGS handling requires checking if the vCPU
* is an unrestricted guest in order to mark L2 as needing emulation
* if L1 runs L2 as a restricted guest.
*/
if (is_unrestricted_guest(vcpu)) {
kvm_register_mark_available(vcpu, VCPU_EXREG_RFLAGS);
vmx->rflags = rflags;
......@@ -3037,6 +3052,15 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_vmx *kvm_vmx = to_kvm_vmx(vcpu->kvm);
/*
* KVM should never use VM86 to virtualize Real Mode when L2 is active,
* as using VM86 is unnecessary if unrestricted guest is enabled, and
* if unrestricted guest is disabled, VM-Enter (from L1) with CR0.PG=0
* should VM-Fail and KVM should reject userspace attempts to stuff
* CR0.PG=0 when L2 is active.
*/
WARN_ON_ONCE(is_guest_mode(vcpu));
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_TR], VCPU_SREG_TR);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_ES], VCPU_SREG_ES);
vmx_get_segment(vcpu, &vmx->rmode.segs[VCPU_SREG_DS], VCPU_SREG_DS);
......@@ -3226,6 +3250,17 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
#define CR3_EXITING_BITS (CPU_BASED_CR3_LOAD_EXITING | \
CPU_BASED_CR3_STORE_EXITING)
static bool vmx_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
if (is_guest_mode(vcpu))
return nested_guest_cr0_valid(vcpu, cr0);
if (to_vmx(vcpu)->nested.vmxon)
return nested_host_cr0_valid(vcpu, cr0);
return true;
}
void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
......@@ -3235,7 +3270,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG);
hw_cr0 = (cr0 & ~KVM_VM_CR0_ALWAYS_OFF);
if (is_unrestricted_guest(vcpu))
if (enable_unrestricted_guest)
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST;
else {
hw_cr0 |= KVM_VM_CR0_ALWAYS_ON;
......@@ -3263,7 +3298,7 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
}
#endif
if (enable_ept && !is_unrestricted_guest(vcpu)) {
if (enable_ept && !enable_unrestricted_guest) {
/*
* Ensure KVM has an up-to-date snapshot of the guest's CR3. If
* the below code _enables_ CR3 exiting, vmx_cache_reg() will
......@@ -3394,7 +3429,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
* this bit, even if host CR4.MCE == 0.
*/
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
if (is_unrestricted_guest(vcpu))
if (enable_unrestricted_guest)
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
else if (vmx->rmode.vm86_active)
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
......@@ -3414,7 +3449,7 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
vcpu->arch.cr4 = cr4;
kvm_register_mark_available(vcpu, VCPU_EXREG_CR4);
if (!is_unrestricted_guest(vcpu)) {
if (!enable_unrestricted_guest) {
if (enable_ept) {
if (!is_paging(vcpu)) {
hw_cr4 &= ~X86_CR4_PAE;
......@@ -4651,7 +4686,8 @@ static int vmx_alloc_ipiv_pid_table(struct kvm *kvm)
if (kvm_vmx->pid_table)
return 0;
pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, vmx_get_pid_table_order(kvm));
pages = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO,
vmx_get_pid_table_order(kvm));
if (!pages)
return -ENOMEM;
......@@ -5364,18 +5400,11 @@ static int handle_set_cr0(struct kvm_vcpu *vcpu, unsigned long val)
val = (val & ~vmcs12->cr0_guest_host_mask) |
(vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask);
if (!nested_guest_cr0_valid(vcpu, val))
return 1;
if (kvm_set_cr0(vcpu, val))
return 1;
vmcs_writel(CR0_READ_SHADOW, orig_val);
return 0;
} else {
if (to_vmx(vcpu)->nested.vmxon &&
!nested_host_cr0_valid(vcpu, val))
return 1;
return kvm_set_cr0(vcpu, val);
}
}
......@@ -8203,6 +8232,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.set_segment = vmx_set_segment,
.get_cpl = vmx_get_cpl,
.get_cs_db_l_bits = vmx_get_cs_db_l_bits,
.is_valid_cr0 = vmx_is_valid_cr0,
.set_cr0 = vmx_set_cr0,
.is_valid_cr4 = vmx_is_valid_cr4,
.set_cr4 = vmx_set_cr4,
......
......@@ -10,7 +10,7 @@
#include "vmcs.h"
#include "../x86.h"
void vmread_error(unsigned long field, bool fault);
void vmread_error(unsigned long field);
void vmwrite_error(unsigned long field, unsigned long value);
void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
......@@ -31,6 +31,13 @@ void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
* void vmread_error_trampoline(unsigned long field, bool fault);
*/
extern unsigned long vmread_error_trampoline;
/*
* The second VMREAD error trampoline, called from the assembly trampoline,
* exists primarily to enable instrumentation for the VM-Fail path.
*/
void vmread_error_trampoline2(unsigned long field, bool fault);
#endif
static __always_inline void vmcs_check16(unsigned long field)
......@@ -101,8 +108,7 @@ static __always_inline unsigned long __vmcs_readl(unsigned long field)
do_fail:
instrumentation_begin();
WARN_ONCE(1, KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
pr_warn_ratelimited(KBUILD_MODNAME ": vmread failed: field=%lx\n", field);
vmread_error(field);
instrumentation_end();
return 0;
......
......@@ -906,6 +906,22 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
}
EXPORT_SYMBOL_GPL(load_pdptrs);
static bool kvm_is_valid_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
#ifdef CONFIG_X86_64
if (cr0 & 0xffffffff00000000UL)
return false;
#endif
if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
return false;
if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
return false;
return static_call(kvm_x86_is_valid_cr0)(vcpu, cr0);
}
void kvm_post_set_cr0(struct kvm_vcpu *vcpu, unsigned long old_cr0, unsigned long cr0)
{
/*
......@@ -952,20 +968,13 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
{
unsigned long old_cr0 = kvm_read_cr0(vcpu);
cr0 |= X86_CR0_ET;
#ifdef CONFIG_X86_64
if (cr0 & 0xffffffff00000000UL)
if (!kvm_is_valid_cr0(vcpu, cr0))
return 1;
#endif
cr0 &= ~CR0_RESERVED_BITS;
if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD))
return 1;
cr0 |= X86_CR0_ET;
if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE))
return 1;
/* Write to CR0 reserved bits are ignored, even on Intel. */
cr0 &= ~CR0_RESERVED_BITS;
#ifdef CONFIG_X86_64
if ((vcpu->arch.efer & EFER_LME) && !is_paging(vcpu) &&
......@@ -2172,6 +2181,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
u64 data;
fastpath_t ret = EXIT_FASTPATH_NONE;
kvm_vcpu_srcu_read_lock(vcpu);
switch (msr) {
case APIC_BASE_MSR + (APIC_ICR >> 4):
data = kvm_read_edx_eax(vcpu);
......@@ -2194,6 +2205,8 @@ fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu)
if (ret != EXIT_FASTPATH_NONE)
trace_kvm_msr_write(msr, data);
kvm_vcpu_srcu_read_unlock(vcpu);
return ret;
}
EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff);
......@@ -10203,10 +10216,14 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
if (r < 0)
goto out;
if (r) {
kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);
int irq = kvm_cpu_get_interrupt(vcpu);
if (!WARN_ON_ONCE(irq == -1)) {
kvm_queue_interrupt(vcpu, irq, false);
static_call(kvm_x86_inject_irq)(vcpu, false);
WARN_ON(static_call(kvm_x86_interrupt_allowed)(vcpu, true) < 0);
}
}
if (kvm_cpu_has_injectable_intr(vcpu))
static_call(kvm_x86_enable_irq_window)(vcpu);
}
......@@ -11460,7 +11477,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
return false;
}
return kvm_is_valid_cr4(vcpu, sregs->cr4);
return kvm_is_valid_cr4(vcpu, sregs->cr4) &&
kvm_is_valid_cr0(vcpu, sregs->cr0);
}
static int __set_sregs_common(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs,
......@@ -13185,7 +13203,7 @@ EXPORT_SYMBOL_GPL(kvm_arch_has_noncoherent_dma);
bool kvm_arch_has_irq_bypass(void)
{
return true;
return enable_apicv && irq_remapping_cap(IRQ_POSTING_CAP);
}
int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
......
// SPDX-License-Identifier: GPL-2.0
/// Use DEFINE_DEBUGFS_ATTRIBUTE rather than DEFINE_SIMPLE_ATTRIBUTE
/// for debugfs files.
///
//# Rationale: DEFINE_SIMPLE_ATTRIBUTE + debugfs_create_file()
//# imposes some significant overhead as compared to
//# DEFINE_DEBUGFS_ATTRIBUTE + debugfs_create_file_unsafe().
//
// Copyright (C): 2016 Nicolai Stange
// Options: --no-includes
//
virtual context
virtual patch
virtual org
virtual report
@dsa@
declarer name DEFINE_SIMPLE_ATTRIBUTE;
identifier dsa_fops;
expression dsa_get, dsa_set, dsa_fmt;
position p;
@@
DEFINE_SIMPLE_ATTRIBUTE@p(dsa_fops, dsa_get, dsa_set, dsa_fmt);
@dcf@
expression name, mode, parent, data;
identifier dsa.dsa_fops;
@@
debugfs_create_file(name, mode, parent, data, &dsa_fops)
@context_dsa depends on context && dcf@
declarer name DEFINE_DEBUGFS_ATTRIBUTE;
identifier dsa.dsa_fops;
expression dsa.dsa_get, dsa.dsa_set, dsa.dsa_fmt;
@@
* DEFINE_SIMPLE_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
@patch_dcf depends on patch expression@
expression name, mode, parent, data;
identifier dsa.dsa_fops;
@@
- debugfs_create_file(name, mode, parent, data, &dsa_fops)
+ debugfs_create_file_unsafe(name, mode, parent, data, &dsa_fops)
@patch_dsa depends on patch_dcf && patch@
identifier dsa.dsa_fops;
expression dsa.dsa_get, dsa.dsa_set, dsa.dsa_fmt;
@@
- DEFINE_SIMPLE_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
+ DEFINE_DEBUGFS_ATTRIBUTE(dsa_fops, dsa_get, dsa_set, dsa_fmt);
@script:python depends on org && dcf@
fops << dsa.dsa_fops;
p << dsa.p;
@@
msg="%s should be defined with DEFINE_DEBUGFS_ATTRIBUTE" % (fops)
coccilib.org.print_todo(p[0], msg)
@script:python depends on report && dcf@
fops << dsa.dsa_fops;
p << dsa.p;
@@
msg="WARNING: %s should be defined with DEFINE_DEBUGFS_ATTRIBUTE" % (fops)
coccilib.report.print_report(p[0], msg)
......@@ -362,8 +362,10 @@ static inline void read_stats_header(int stats_fd, struct kvm_stats_header *head
{
ssize_t ret;
ret = read(stats_fd, header, sizeof(*header));
TEST_ASSERT(ret == sizeof(*header), "Read stats header");
ret = pread(stats_fd, header, sizeof(*header), 0);
TEST_ASSERT(ret == sizeof(*header),
"Failed to read '%lu' header bytes, ret = '%ld'",
sizeof(*header), ret);
}
struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
......
......@@ -43,8 +43,10 @@ static void stats_test(int stats_fd)
id = malloc(header.name_size);
TEST_ASSERT(id, "Allocate memory for id string");
ret = read(stats_fd, id, header.name_size);
TEST_ASSERT(ret == header.name_size, "Read id string");
ret = pread(stats_fd, id, header.name_size, sizeof(header));
TEST_ASSERT(ret == header.name_size,
"Expected header size '%u', read '%lu' bytes",
header.name_size, ret);
/* Check id string, that should start with "kvm" */
TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size,
......@@ -165,23 +167,7 @@ static void stats_test(int stats_fd)
free(stats_data);
free(stats_desc);
free(id);
}
static void vm_stats_test(struct kvm_vm *vm)
{
int stats_fd = vm_get_stats_fd(vm);
stats_test(stats_fd);
close(stats_fd);
TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
}
static void vcpu_stats_test(struct kvm_vcpu *vcpu)
{
int stats_fd = vcpu_get_stats_fd(vcpu);
stats_test(stats_fd);
close(stats_fd);
TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
}
......@@ -199,6 +185,7 @@ static void vcpu_stats_test(struct kvm_vcpu *vcpu)
int main(int argc, char *argv[])
{
int vm_stats_fds, *vcpu_stats_fds;
int i, j;
struct kvm_vcpu **vcpus;
struct kvm_vm **vms;
......@@ -231,23 +218,58 @@ int main(int argc, char *argv[])
vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu);
TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers");
/*
* Not per-VM as the array is populated, used, and invalidated within a
* single for-loop iteration.
*/
vcpu_stats_fds = calloc(max_vm, sizeof(*vcpu_stats_fds));
TEST_ASSERT(vcpu_stats_fds, "Allocate memory for VM stats fds");
for (i = 0; i < max_vm; ++i) {
vms[i] = vm_create_barebones();
for (j = 0; j < max_vcpu; ++j)
vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j);
}
/* Check stats read for every VM and VCPU */
/*
* Check stats read for every VM and vCPU, with a variety of flavors.
* Note, stats_test() closes the passed in stats fd.
*/
for (i = 0; i < max_vm; ++i) {
vm_stats_test(vms[i]);
/*
* Verify that creating multiple userspace references to a
* single stats file works and doesn't cause explosions.
*/
vm_stats_fds = vm_get_stats_fd(vms[i]);
stats_test(dup(vm_stats_fds));
/* Verify userspace can instantiate multiple stats files. */
stats_test(vm_get_stats_fd(vms[i]));
for (j = 0; j < max_vcpu; ++j) {
vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
stats_test(dup(vcpu_stats_fds[j]));
stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
}
/*
* Close the VM fd and redo the stats tests. KVM should gift a
* reference (to the VM) to each stats fd, i.e. stats should
* still be accessible even after userspace has put its last
* _direct_ reference to the VM.
*/
kvm_vm_free(vms[i]);
stats_test(vm_stats_fds);
for (j = 0; j < max_vcpu; ++j)
vcpu_stats_test(vcpus[i * max_vcpu + j]);
stats_test(vcpu_stats_fds[j]);
ksft_test_result_pass("vm%i\n", i);
}
for (i = 0; i < max_vm; ++i)
kvm_vm_free(vms[i]);
free(vms);
free(vcpus);
free(vcpu_stats_fds);
ksft_finished(); /* Print results and exit() accordingly */
}
......@@ -22,26 +22,25 @@
#include "kvm_util.h"
#include "processor.h"
static void test_cr4_feature_bit(struct kvm_vcpu *vcpu, struct kvm_sregs *orig,
uint64_t feature_bit)
{
struct kvm_sregs sregs;
int rc;
/* Skip the sub-test, the feature is supported. */
if (orig->cr4 & feature_bit)
return;
memcpy(&sregs, orig, sizeof(sregs));
sregs.cr4 |= feature_bit;
rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(rc, "KVM allowed unsupported CR4 bit (0x%lx)", feature_bit);
/* Sanity check that KVM didn't change anything. */
vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(!memcmp(&sregs, orig, sizeof(sregs)), "KVM modified sregs");
}
#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \
do { \
struct kvm_sregs new; \
int rc; \
\
/* Skip the sub-test, the feature/bit is supported. */ \
if (orig.cr & bit) \
break; \
\
memcpy(&new, &orig, sizeof(sregs)); \
new.cr |= bit; \
\
rc = _vcpu_sregs_set(vcpu, &new); \
TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \
\
/* Sanity check that KVM didn't change anything. */ \
vcpu_sregs_get(vcpu, &new); \
TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
} while (0)
static uint64_t calc_supported_cr4_feature_bits(void)
{
......@@ -80,7 +79,7 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t cr4;
int rc;
int rc, i;
/*
* Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
......@@ -92,6 +91,7 @@ int main(int argc, char *argv[])
vcpu_sregs_get(vcpu, &sregs);
sregs.cr0 = 0;
sregs.cr4 |= calc_supported_cr4_feature_bits();
cr4 = sregs.cr4;
......@@ -103,16 +103,24 @@ int main(int argc, char *argv[])
sregs.cr4, cr4);
/* Verify all unsupported features are rejected by KVM. */
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_UMIP);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_LA57);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_VMXE);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMXE);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_FSGSBASE);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PCIDE);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_OSXSAVE);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMEP);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_SMAP);
test_cr4_feature_bit(vcpu, &sregs, X86_CR4_PKE);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
for (i = 32; i < 64; i++)
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
/* NW without CD is illegal, as is PG without PE. */
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
kvm_vm_free(vm);
/* Create a "real" VM and verify APIC_BASE can be set. */
......
......@@ -34,9 +34,17 @@
#include "../kselftest.h"
#include "rseq.h"
static const ptrdiff_t *libc_rseq_offset_p;
static const unsigned int *libc_rseq_size_p;
static const unsigned int *libc_rseq_flags_p;
/*
* Define weak versions to play nice with binaries that are statically linked
* against a libc that doesn't support registering its own rseq.
*/
__weak ptrdiff_t __rseq_offset;
__weak unsigned int __rseq_size;
__weak unsigned int __rseq_flags;
static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
static const unsigned int *libc_rseq_size_p = &__rseq_size;
static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
/* Offset from the thread pointer to the rseq area. */
ptrdiff_t rseq_offset;
......@@ -155,9 +163,17 @@ unsigned int get_rseq_feature_size(void)
static __attribute__((constructor))
void rseq_init(void)
{
/*
* If the libc's registered rseq size isn't already valid, it may be
* because the binary is dynamically linked and not necessarily due to
* libc not having registered a restartable sequence. Try to find the
* symbols if that's the case.
*/
if (!*libc_rseq_size_p) {
libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
}
if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
*libc_rseq_size_p != 0) {
/* rseq registration owned by glibc */
......
......@@ -4035,8 +4035,17 @@ static ssize_t kvm_vcpu_stats_read(struct file *file, char __user *user_buffer,
sizeof(vcpu->stat), user_buffer, size, offset);
}
static int kvm_vcpu_stats_release(struct inode *inode, struct file *file)
{
struct kvm_vcpu *vcpu = file->private_data;
kvm_put_kvm(vcpu->kvm);
return 0;
}
static const struct file_operations kvm_vcpu_stats_fops = {
.read = kvm_vcpu_stats_read,
.release = kvm_vcpu_stats_release,
.llseek = noop_llseek,
};
......@@ -4057,6 +4066,9 @@ static int kvm_vcpu_ioctl_get_stats_fd(struct kvm_vcpu *vcpu)
put_unused_fd(fd);
return PTR_ERR(file);
}
kvm_get_kvm(vcpu->kvm);
file->f_mode |= FMODE_PREAD;
fd_install(fd, file);
......@@ -4701,8 +4713,17 @@ static ssize_t kvm_vm_stats_read(struct file *file, char __user *user_buffer,
sizeof(kvm->stat), user_buffer, size, offset);
}
static int kvm_vm_stats_release(struct inode *inode, struct file *file)
{
struct kvm *kvm = file->private_data;
kvm_put_kvm(kvm);
return 0;
}
static const struct file_operations kvm_vm_stats_fops = {
.read = kvm_vm_stats_read,
.release = kvm_vm_stats_release,
.llseek = noop_llseek,
};
......@@ -4721,6 +4742,9 @@ static int kvm_vm_ioctl_get_stats_fd(struct kvm *kvm)
put_unused_fd(fd);
return PTR_ERR(file);
}
kvm_get_kvm(kvm);
file->f_mode |= FMODE_PREAD;
fd_install(fd, file);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment