Commit 9d0c8e79 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "More fixes for ARM and x86"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: LAPIC: Advancing the timer expiration on guest initiated write
  KVM: x86/mmu: Skip !MMU-present SPTEs when removing SP in exclusive mode
  KVM: kvmclock: Fix vCPUs > 64 can't be online/hotpluged
  kvm: x86: annotate RCU pointers
  KVM: arm64: Fix exclusive limit for IPA size
  KVM: arm64: Reject VM creation when the default IPA size is unsupported
  KVM: arm64: Ensure I-cache isolation between vcpus of a same VM
  KVM: arm64: Don't use cbz/adr with external symbols
  KVM: arm64: Fix range alignment when walking page tables
  KVM: arm64: Workaround firmware wrongly advertising GICv2-on-v3 compatibility
  KVM: arm64: Rename __vgic_v3_get_ich_vtr_el2() to __vgic_v3_get_gic_config()
  KVM: arm64: Don't access PMSELR_EL0/PMUSERENR_EL0 when no PMU is available
  KVM: arm64: Turn kvm_arm_support_pmu_v3() into a static key
  KVM: arm64: Fix nVHE hyp panic host context restore
  KVM: arm64: Avoid corrupting vCPU context register in guest exit
  KVM: arm64: nvhe: Save the SPE context early
  kvm: x86: use NULL instead of using plain integer as pointer
  KVM: SVM: Connect 'npt' module param to KVM's internal 'npt_enabled'
  KVM: x86: Ensure deadline timer has truly expired before posting its IRQ
parents 50eb842f 35737d2d
...@@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can ...@@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can
be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
ioctl() at run-time. ioctl() at run-time.
Creation of the VM will fail if the requested IPA size (whether it is
implicit or explicit) is unsupported on the host.
Please note that configuring the IPA size does not affect the capability Please note that configuring the IPA size does not affect the capability
exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
size of the address translated by the stage2 level (guest physical to size of the address translated by the stage2 level (guest physical to
......
...@@ -47,10 +47,10 @@ ...@@ -47,10 +47,10 @@
#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 #define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4
#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid 5 #define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5
#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6
#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2 8 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11
...@@ -183,16 +183,16 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs); ...@@ -183,16 +183,16 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
#define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs) #define __bp_harden_hyp_vecs CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
extern void __kvm_flush_vm_context(void); extern void __kvm_flush_vm_context(void);
extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa, extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
int level); int level);
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu); extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern void __kvm_timer_set_cntvoff(u64 cntvoff);
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_get_gic_config(void);
extern u64 __vgic_v3_read_vmcr(void); extern u64 __vgic_v3_read_vmcr(void);
extern void __vgic_v3_write_vmcr(u32 vmcr); extern void __vgic_v3_write_vmcr(u32 vmcr);
extern void __vgic_v3_init_lrs(void); extern void __vgic_v3_init_lrs(void);
......
...@@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt); ...@@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
void __debug_switch_to_guest(struct kvm_vcpu *vcpu); void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
void __debug_switch_to_host(struct kvm_vcpu *vcpu); void __debug_switch_to_host(struct kvm_vcpu *vcpu);
#ifdef __KVM_NVHE_HYPERVISOR__
void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
#endif
void __fpsimd_save_state(struct user_fpsimd_state *fp_regs); void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs); void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
...@@ -97,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt); ...@@ -97,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
void __noreturn hyp_panic(void); void __noreturn hyp_panic(void);
#ifdef __KVM_NVHE_HYPERVISOR__ #ifdef __KVM_NVHE_HYPERVISOR__
void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
u64 elr, u64 par);
#endif #endif
#endif /* __ARM64_KVM_HYP_H__ */ #endif /* __ARM64_KVM_HYP_H__ */
...@@ -101,6 +101,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table); ...@@ -101,6 +101,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table);
/* Array containing bases of nVHE per-CPU memory regions. */ /* Array containing bases of nVHE per-CPU memory regions. */
KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base); KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
/* PMU available static key */
KVM_NVHE_ALIAS(kvm_arm_pmu_available);
#endif /* CONFIG_KVM */ #endif /* CONFIG_KVM */
#endif /* __ARM64_KERNEL_IMAGE_VARS_H */ #endif /* __ARM64_KERNEL_IMAGE_VARS_H */
...@@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) ...@@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
last_ran = this_cpu_ptr(mmu->last_vcpu_ran); last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
/* /*
* We guarantee that both TLBs and I-cache are private to each
* vcpu. If detecting that a vcpu from the same VM has
* previously run on the same physical CPU, call into the
* hypervisor code to nuke the relevant contexts.
*
* We might get preempted before the vCPU actually runs, but * We might get preempted before the vCPU actually runs, but
* over-invalidation doesn't affect correctness. * over-invalidation doesn't affect correctness.
*/ */
if (*last_ran != vcpu->vcpu_id) { if (*last_ran != vcpu->vcpu_id) {
kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu); kvm_call_hyp(__kvm_flush_cpu_context, mmu);
*last_ran = vcpu->vcpu_id; *last_ran = vcpu->vcpu_id;
} }
......
...@@ -85,8 +85,10 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL) ...@@ -85,8 +85,10 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
// If the hyp context is loaded, go straight to hyp_panic // If the hyp context is loaded, go straight to hyp_panic
get_loaded_vcpu x0, x1 get_loaded_vcpu x0, x1
cbz x0, hyp_panic cbnz x0, 1f
b hyp_panic
1:
// The hyp context is saved so make sure it is restored to allow // The hyp context is saved so make sure it is restored to allow
// hyp_panic to run at hyp and, subsequently, panic to run in the host. // hyp_panic to run at hyp and, subsequently, panic to run in the host.
// This makes use of __guest_exit to avoid duplication but sets the // This makes use of __guest_exit to avoid duplication but sets the
...@@ -94,7 +96,7 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL) ...@@ -94,7 +96,7 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
// current state is saved to the guest context but it will only be // current state is saved to the guest context but it will only be
// accurate if the guest had been completely restored. // accurate if the guest had been completely restored.
adr_this_cpu x0, kvm_hyp_ctxt, x1 adr_this_cpu x0, kvm_hyp_ctxt, x1
adr x1, hyp_panic adr_l x1, hyp_panic
str x1, [x0, #CPU_XREG_OFFSET(30)] str x1, [x0, #CPU_XREG_OFFSET(30)]
get_vcpu_ptr x1, x0 get_vcpu_ptr x1, x0
...@@ -146,7 +148,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL) ...@@ -146,7 +148,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
// Now restore the hyp regs // Now restore the hyp regs
restore_callee_saved_regs x2 restore_callee_saved_regs x2
set_loaded_vcpu xzr, x1, x2 set_loaded_vcpu xzr, x2, x3
alternative_if ARM64_HAS_RAS_EXTN alternative_if ARM64_HAS_RAS_EXTN
// If we have the RAS extensions we can consume a pending error // If we have the RAS extensions we can consume a pending error
......
...@@ -90,15 +90,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu) ...@@ -90,15 +90,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
* counter, which could make a PMXEVCNTR_EL0 access UNDEF at * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
* EL1 instead of being trapped to EL2. * EL1 instead of being trapped to EL2.
*/ */
write_sysreg(0, pmselr_el0); if (kvm_arm_support_pmu_v3()) {
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0); write_sysreg(0, pmselr_el0);
write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
}
write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2); write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
} }
static inline void __deactivate_traps_common(void) static inline void __deactivate_traps_common(void)
{ {
write_sysreg(0, hstr_el2); write_sysreg(0, hstr_el2);
write_sysreg(0, pmuserenr_el0); if (kvm_arm_support_pmu_v3())
write_sysreg(0, pmuserenr_el0);
} }
static inline void ___activate_traps(struct kvm_vcpu *vcpu) static inline void ___activate_traps(struct kvm_vcpu *vcpu)
......
...@@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1) ...@@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1)
write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1); write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
} }
void __debug_switch_to_guest(struct kvm_vcpu *vcpu) void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{ {
/* Disable and flush SPE data generation */ /* Disable and flush SPE data generation */
__debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1); __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
}
void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
{
__debug_switch_to_guest_common(vcpu); __debug_switch_to_guest_common(vcpu);
} }
void __debug_switch_to_host(struct kvm_vcpu *vcpu) void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
{ {
__debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1); __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
}
void __debug_switch_to_host(struct kvm_vcpu *vcpu)
{
__debug_switch_to_host_common(vcpu); __debug_switch_to_host_common(vcpu);
} }
......
...@@ -71,7 +71,8 @@ SYM_FUNC_START(__host_enter) ...@@ -71,7 +71,8 @@ SYM_FUNC_START(__host_enter)
SYM_FUNC_END(__host_enter) SYM_FUNC_END(__host_enter)
/* /*
* void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par); * void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
* u64 elr, u64 par);
*/ */
SYM_FUNC_START(__hyp_do_panic) SYM_FUNC_START(__hyp_do_panic)
/* Prepare and exit to the host's panic funciton. */ /* Prepare and exit to the host's panic funciton. */
...@@ -82,9 +83,11 @@ SYM_FUNC_START(__hyp_do_panic) ...@@ -82,9 +83,11 @@ SYM_FUNC_START(__hyp_do_panic)
hyp_kimg_va lr, x6 hyp_kimg_va lr, x6
msr elr_el2, lr msr elr_el2, lr
/* Set the panic format string. Use the, now free, LR as scratch. */ mov x29, x0
ldr lr, =__hyp_panic_string
hyp_kimg_va lr, x6 /* Load the format string into x0 and arguments into x1-7 */
ldr x0, =__hyp_panic_string
hyp_kimg_va x0, x6
/* Load the format arguments into x1-7. */ /* Load the format arguments into x1-7. */
mov x6, x3 mov x6, x3
...@@ -94,9 +97,7 @@ SYM_FUNC_START(__hyp_do_panic) ...@@ -94,9 +97,7 @@ SYM_FUNC_START(__hyp_do_panic)
mrs x5, hpfar_el2 mrs x5, hpfar_el2
/* Enter the host, conditionally restoring the host context. */ /* Enter the host, conditionally restoring the host context. */
cmp x0, xzr cbz x29, __host_enter_without_restoring
mov x0, lr
b.eq __host_enter_without_restoring
b __host_enter_for_panic b __host_enter_for_panic
SYM_FUNC_END(__hyp_do_panic) SYM_FUNC_END(__hyp_do_panic)
......
...@@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt) ...@@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
__kvm_tlb_flush_vmid(kern_hyp_va(mmu)); __kvm_tlb_flush_vmid(kern_hyp_va(mmu));
} }
static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt) static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
{ {
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1); DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
__kvm_tlb_flush_local_vmid(kern_hyp_va(mmu)); __kvm_flush_cpu_context(kern_hyp_va(mmu));
} }
static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt) static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
...@@ -67,9 +67,9 @@ static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt) ...@@ -67,9 +67,9 @@ static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt)
write_sysreg_el2(tmp, SYS_SCTLR); write_sysreg_el2(tmp, SYS_SCTLR);
} }
static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt) static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
{ {
cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2(); cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
} }
static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt) static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
...@@ -115,10 +115,10 @@ static const hcall_t host_hcall[] = { ...@@ -115,10 +115,10 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_flush_vm_context),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid), HANDLE_FUNC(__kvm_tlb_flush_vmid),
HANDLE_FUNC(__kvm_tlb_flush_local_vmid), HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff), HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__kvm_enable_ssbs), HANDLE_FUNC(__kvm_enable_ssbs),
HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2), HANDLE_FUNC(__vgic_v3_get_gic_config),
HANDLE_FUNC(__vgic_v3_read_vmcr), HANDLE_FUNC(__vgic_v3_read_vmcr),
HANDLE_FUNC(__vgic_v3_write_vmcr), HANDLE_FUNC(__vgic_v3_write_vmcr),
HANDLE_FUNC(__vgic_v3_init_lrs), HANDLE_FUNC(__vgic_v3_init_lrs),
......
...@@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
pmu_switch_needed = __pmu_switch_to_guest(host_ctxt); pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
__sysreg_save_state_nvhe(host_ctxt); __sysreg_save_state_nvhe(host_ctxt);
/*
* We must flush and disable the SPE buffer for nVHE, as
* the translation regime(EL1&0) is going to be loaded with
* that of the guest. And we must do this before we change the
* translation regime to EL2 (via MDCR_EL2_E2PB == 0) and
* before we load guest Stage1.
*/
__debug_save_host_buffers_nvhe(vcpu);
__adjust_pc(vcpu); __adjust_pc(vcpu);
...@@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) ...@@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
__fpsimd_save_fpexc32(vcpu); __fpsimd_save_fpexc32(vcpu);
__debug_switch_to_host(vcpu);
/* /*
* This must come after restoring the host sysregs, since a non-VHE * This must come after restoring the host sysregs, since a non-VHE
* system may enable SPE here and make use of the TTBRs. * system may enable SPE here and make use of the TTBRs.
*/ */
__debug_switch_to_host(vcpu); __debug_restore_host_buffers_nvhe(vcpu);
if (pmu_switch_needed) if (pmu_switch_needed)
__pmu_switch_to_host(host_ctxt); __pmu_switch_to_host(host_ctxt);
...@@ -257,7 +266,6 @@ void __noreturn hyp_panic(void) ...@@ -257,7 +266,6 @@ void __noreturn hyp_panic(void)
u64 spsr = read_sysreg_el2(SYS_SPSR); u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR); u64 elr = read_sysreg_el2(SYS_ELR);
u64 par = read_sysreg_par(); u64 par = read_sysreg_par();
bool restore_host = true;
struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *host_ctxt;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
...@@ -271,7 +279,7 @@ void __noreturn hyp_panic(void) ...@@ -271,7 +279,7 @@ void __noreturn hyp_panic(void)
__sysreg_restore_state_nvhe(host_ctxt); __sysreg_restore_state_nvhe(host_ctxt);
} }
__hyp_do_panic(restore_host, spsr, elr, par); __hyp_do_panic(host_ctxt, spsr, elr, par);
unreachable(); unreachable();
} }
......
...@@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) ...@@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
__tlb_switch_to_host(&cxt); __tlb_switch_to_host(&cxt);
} }
void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
{ {
struct tlb_inv_context cxt; struct tlb_inv_context cxt;
...@@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) ...@@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
__tlb_switch_to_guest(mmu, &cxt); __tlb_switch_to_guest(mmu, &cxt);
__tlbi(vmalle1); __tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh); dsb(nsh);
isb(); isb();
......
...@@ -223,6 +223,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data, ...@@ -223,6 +223,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
goto out; goto out;
if (!table) { if (!table) {
data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
data->addr += kvm_granule_size(level); data->addr += kvm_granule_size(level);
goto out; goto out;
} }
......
...@@ -405,9 +405,45 @@ void __vgic_v3_init_lrs(void) ...@@ -405,9 +405,45 @@ void __vgic_v3_init_lrs(void)
__gic_v3_set_lr(0, i); __gic_v3_set_lr(0, i);
} }
u64 __vgic_v3_get_ich_vtr_el2(void) /*
* Return the GIC CPU configuration:
* - [31:0] ICH_VTR_EL2
* - [62:32] RES0
* - [63] MMIO (GICv2) capable
*/
u64 __vgic_v3_get_gic_config(void)
{ {
return read_gicreg(ICH_VTR_EL2); u64 val, sre = read_gicreg(ICC_SRE_EL1);
unsigned long flags = 0;
/*
* To check whether we have a MMIO-based (GICv2 compatible)
* CPU interface, we need to disable the system register
* view. To do that safely, we have to prevent any interrupt
* from firing (which would be deadly).
*
* Note that this only makes sense on VHE, as interrupts are
* already masked for nVHE as part of the exception entry to
* EL2.
*/
if (has_vhe())
flags = local_daif_save();
write_gicreg(0, ICC_SRE_EL1);
isb();
val = read_gicreg(ICC_SRE_EL1);
write_gicreg(sre, ICC_SRE_EL1);
isb();
if (has_vhe())
local_daif_restore(flags);
val = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63);
val |= read_gicreg(ICH_VTR_EL2);
return val;
} }
u64 __vgic_v3_read_vmcr(void) u64 __vgic_v3_read_vmcr(void)
......
...@@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu) ...@@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
__tlb_switch_to_host(&cxt); __tlb_switch_to_host(&cxt);
} }
void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
{ {
struct tlb_inv_context cxt; struct tlb_inv_context cxt;
...@@ -135,6 +135,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu) ...@@ -135,6 +135,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
__tlb_switch_to_guest(mmu, &cxt); __tlb_switch_to_guest(mmu, &cxt);
__tlbi(vmalle1); __tlbi(vmalle1);
asm volatile("ic iallu");
dsb(nsh); dsb(nsh);
isb(); isb();
......
...@@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, ...@@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
* Prevent userspace from creating a memory region outside of the IPA * Prevent userspace from creating a memory region outside of the IPA
* space addressable by the KVM guest IPA space. * space addressable by the KVM guest IPA space.
*/ */
if (memslot->base_gfn + memslot->npages >= if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
(kvm_phys_size(kvm) >> PAGE_SHIFT))
return -EFAULT; return -EFAULT;
mmap_read_lock(current->mm); mmap_read_lock(current->mm);
......
...@@ -11,6 +11,8 @@ ...@@ -11,6 +11,8 @@
#include <asm/kvm_emulate.h> #include <asm/kvm_emulate.h>
DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static int kvm_is_in_guest(void) static int kvm_is_in_guest(void)
{ {
return kvm_get_running_vcpu() != NULL; return kvm_get_running_vcpu() != NULL;
...@@ -48,6 +50,14 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = { ...@@ -48,6 +50,14 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
int kvm_perf_init(void) int kvm_perf_init(void)
{ {
/*
* Check if HW_PERF_EVENTS are supported by checking the number of
* hardware performance counters. This could ensure the presence of
* a physical PMU and CONFIG_PERF_EVENT is selected.
*/
if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0)
static_branch_enable(&kvm_arm_pmu_available);
return perf_register_guest_info_callbacks(&kvm_guest_cbs); return perf_register_guest_info_callbacks(&kvm_guest_cbs);
} }
......
...@@ -823,16 +823,6 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1) ...@@ -823,16 +823,6 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
return val & mask; return val & mask;
} }
bool kvm_arm_support_pmu_v3(void)
{
/*
* Check if HW_PERF_EVENTS are supported by checking the number of
* hardware performance counters. This could ensure the presence of
* a physical PMU and CONFIG_PERF_EVENT is selected.
*/
return (perf_num_counters() > 0);
}
int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
{ {
if (!kvm_vcpu_has_pmu(vcpu)) if (!kvm_vcpu_has_pmu(vcpu))
......
...@@ -326,10 +326,9 @@ int kvm_set_ipa_limit(void) ...@@ -326,10 +326,9 @@ int kvm_set_ipa_limit(void)
} }
kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange); kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
WARN(kvm_ipa_limit < KVM_PHYS_SHIFT, kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
"KVM IPA Size Limit (%d bits) is smaller than default size\n", ((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
kvm_ipa_limit); " (Reduced IPA size, limited VM/VMM compatibility)" : ""));
kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
return 0; return 0;
} }
...@@ -358,6 +357,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) ...@@ -358,6 +357,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
return -EINVAL; return -EINVAL;
} else { } else {
phys_shift = KVM_PHYS_SHIFT; phys_shift = KVM_PHYS_SHIFT;
if (phys_shift > kvm_ipa_limit) {
pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
current->comm);
return -EINVAL;
}
} }
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
......
...@@ -574,9 +574,13 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); ...@@ -574,9 +574,13 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
*/ */
int vgic_v3_probe(const struct gic_kvm_info *info) int vgic_v3_probe(const struct gic_kvm_info *info)
{ {
u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
bool has_v2;
int ret; int ret;
has_v2 = ich_vtr_el2 >> 63;
ich_vtr_el2 = (u32)ich_vtr_el2;
/* /*
* The ListRegs field is 5 bits, but there is an architectural * The ListRegs field is 5 bits, but there is an architectural
* maximum of 16 list registers. Just ignore bit 4... * maximum of 16 list registers. Just ignore bit 4...
...@@ -594,13 +598,15 @@ int vgic_v3_probe(const struct gic_kvm_info *info) ...@@ -594,13 +598,15 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
gicv4_enable ? "en" : "dis"); gicv4_enable ? "en" : "dis");
} }
kvm_vgic_global_state.vcpu_base = 0;
if (!info->vcpu.start) { if (!info->vcpu.start) {
kvm_info("GICv3: no GICV resource entry\n"); kvm_info("GICv3: no GICV resource entry\n");
kvm_vgic_global_state.vcpu_base = 0; } else if (!has_v2) {
pr_warn(FW_BUG "CPU interface incapable of MMIO access\n");
} else if (!PAGE_ALIGNED(info->vcpu.start)) { } else if (!PAGE_ALIGNED(info->vcpu.start)) {
pr_warn("GICV physical address 0x%llx not page aligned\n", pr_warn("GICV physical address 0x%llx not page aligned\n",
(unsigned long long)info->vcpu.start); (unsigned long long)info->vcpu.start);
kvm_vgic_global_state.vcpu_base = 0;
} else { } else {
kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.vcpu_base = info->vcpu.start;
kvm_vgic_global_state.can_emulate_gicv2 = true; kvm_vgic_global_state.can_emulate_gicv2 = true;
......
...@@ -963,7 +963,7 @@ struct kvm_arch { ...@@ -963,7 +963,7 @@ struct kvm_arch {
struct kvm_pit *vpit; struct kvm_pit *vpit;
atomic_t vapics_in_nmi_mode; atomic_t vapics_in_nmi_mode;
struct mutex apic_map_lock; struct mutex apic_map_lock;
struct kvm_apic_map *apic_map; struct kvm_apic_map __rcu *apic_map;
atomic_t apic_map_dirty; atomic_t apic_map_dirty;
bool apic_access_page_done; bool apic_access_page_done;
...@@ -1036,7 +1036,7 @@ struct kvm_arch { ...@@ -1036,7 +1036,7 @@ struct kvm_arch {
bool bus_lock_detection_enabled; bool bus_lock_detection_enabled;
struct kvm_pmu_event_filter *pmu_event_filter; struct kvm_pmu_event_filter __rcu *pmu_event_filter;
struct task_struct *nx_lpage_recovery_thread; struct task_struct *nx_lpage_recovery_thread;
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
......
...@@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void) ...@@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void)
static int __init kvm_setup_vsyscall_timeinfo(void) static int __init kvm_setup_vsyscall_timeinfo(void)
{ {
#ifdef CONFIG_X86_64 kvmclock_init_mem();
u8 flags;
if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall) #ifdef CONFIG_X86_64
return 0; if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) {
u8 flags;
flags = pvclock_read_flags(&hv_clock_boot[0].pvti); flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
if (!(flags & PVCLOCK_TSC_STABLE_BIT)) if (!(flags & PVCLOCK_TSC_STABLE_BIT))
return 0; return 0;
kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK; kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
}
#endif #endif
kvmclock_init_mem();
return 0; return 0;
} }
early_initcall(kvm_setup_vsyscall_timeinfo); early_initcall(kvm_setup_vsyscall_timeinfo);
......
...@@ -1642,7 +1642,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) ...@@ -1642,7 +1642,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
} }
if (kvm_use_posted_timer_interrupt(apic->vcpu)) { if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
kvm_wait_lapic_expire(vcpu); /*
* Ensure the guest's timer has truly expired before posting an
* interrupt. Open code the relevant checks to avoid querying
* lapic_timer_int_injected(), which will be false since the
* interrupt isn't yet injected. Waiting until after injecting
* is not an option since that won't help a posted interrupt.
*/
if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
vcpu->arch.apic->lapic_timer.timer_advance_ns)
__kvm_wait_lapic_expire(vcpu);
kvm_apic_inject_pending_timer_irqs(apic); kvm_apic_inject_pending_timer_irqs(apic);
return; return;
} }
...@@ -2595,6 +2604,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) ...@@ -2595,6 +2604,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
apic_update_ppr(apic); apic_update_ppr(apic);
hrtimer_cancel(&apic->lapic_timer.timer); hrtimer_cancel(&apic->lapic_timer.timer);
apic->lapic_timer.expired_tscdeadline = 0;
apic_update_lvtt(apic); apic_update_lvtt(apic);
apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
update_divide_count(apic); update_divide_count(apic);
......
...@@ -337,7 +337,18 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, ...@@ -337,7 +337,18 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
cpu_relax(); cpu_relax();
} }
} else { } else {
/*
* If the SPTE is not MMU-present, there is no backing
* page associated with the SPTE and so no side effects
* that need to be recorded, and exclusive ownership of
* mmu_lock ensures the SPTE can't be made present.
* Note, zapping MMIO SPTEs is also unnecessary as they
* are guarded by the memslots generation, not by being
* unreachable.
*/
old_child_spte = READ_ONCE(*sptep); old_child_spte = READ_ONCE(*sptep);
if (!is_shadow_present_pte(old_child_spte))
continue;
/* /*
* Marking the SPTE as a removed SPTE is not * Marking the SPTE as a removed SPTE is not
......
...@@ -115,13 +115,6 @@ static const struct svm_direct_access_msrs { ...@@ -115,13 +115,6 @@ static const struct svm_direct_access_msrs {
{ .index = MSR_INVALID, .always = false }, { .index = MSR_INVALID, .always = false },
}; };
/* enable NPT for AMD64 and X86 with PAE */
#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
bool npt_enabled = true;
#else
bool npt_enabled;
#endif
/* /*
* These 2 parameters are used to config the controls for Pause-Loop Exiting: * These 2 parameters are used to config the controls for Pause-Loop Exiting:
* pause_filter_count: On processors that support Pause filtering(indicated * pause_filter_count: On processors that support Pause filtering(indicated
...@@ -170,9 +163,12 @@ module_param(pause_filter_count_shrink, ushort, 0444); ...@@ -170,9 +163,12 @@ module_param(pause_filter_count_shrink, ushort, 0444);
static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX; static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
module_param(pause_filter_count_max, ushort, 0444); module_param(pause_filter_count_max, ushort, 0444);
/* allow nested paging (virtualized MMU) for all guests */ /*
static int npt = true; * Use nested page tables by default. Note, NPT may get forced off by
module_param(npt, int, S_IRUGO); * svm_hardware_setup() if it's unsupported by hardware or the host kernel.
*/
bool npt_enabled = true;
module_param_named(npt, npt_enabled, bool, 0444);
/* allow nested virtualization in KVM/SVM */ /* allow nested virtualization in KVM/SVM */
static int nested = true; static int nested = true;
...@@ -988,10 +984,15 @@ static __init int svm_hardware_setup(void) ...@@ -988,10 +984,15 @@ static __init int svm_hardware_setup(void)
goto err; goto err;
} }
if (!boot_cpu_has(X86_FEATURE_NPT)) /*
* KVM's MMU doesn't support using 2-level paging for itself, and thus
* NPT isn't supported if the host is using 2-level paging since host
* CR4 is unchanged on VMRUN.
*/
if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
npt_enabled = false; npt_enabled = false;
if (npt_enabled && !npt) if (!boot_cpu_has(X86_FEATURE_NPT))
npt_enabled = false; npt_enabled = false;
kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G); kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
......
...@@ -10601,7 +10601,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, ...@@ -10601,7 +10601,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
return (void __user *)hva; return (void __user *)hva;
} else { } else {
if (!slot || !slot->npages) if (!slot || !slot->npages)
return 0; return NULL;
old_npages = slot->npages; old_npages = slot->npages;
hva = slot->userspace_addr; hva = slot->userspace_addr;
......
...@@ -13,6 +13,13 @@ ...@@ -13,6 +13,13 @@
#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1) #define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1) #define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
static __always_inline bool kvm_arm_support_pmu_v3(void)
{
return static_branch_likely(&kvm_arm_pmu_available);
}
#ifdef CONFIG_HW_PERF_EVENTS #ifdef CONFIG_HW_PERF_EVENTS
struct kvm_pmc { struct kvm_pmc {
...@@ -47,7 +54,6 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val); ...@@ -47,7 +54,6 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val); void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
u64 select_idx); u64 select_idx);
bool kvm_arm_support_pmu_v3(void);
int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr); struct kvm_device_attr *attr);
int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
...@@ -87,7 +93,6 @@ static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {} ...@@ -87,7 +93,6 @@ static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {} static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
u64 data, u64 select_idx) {} u64 data, u64 select_idx) {}
static inline bool kvm_arm_support_pmu_v3(void) { return false; }
static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
struct kvm_device_attr *attr) struct kvm_device_attr *attr)
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment