Commit 69995074 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Radim Krčmář:
 "ARM:

   - Yet another race with VM destruction plugged

   - A set of small vgic fixes

  x86:

   - Preserve pending INIT

   - RCU fixes in paravirtual async pf, VM teardown, and VMXOFF
     emulation

   - nVMX interrupt injection and dirty tracking fixes

   - initialize to make UBSAN happy"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: arm/arm64: vgic: Use READ_ONCE fo cmpxchg
  KVM: nVMX: Fix interrupt window request with "Acknowledge interrupt on exit"
  KVM: nVMX: mark vmcs12 pages dirty on L2 exit
  kvm: nVMX: don't flush VMCS12 during VMXOFF or VCPU teardown
  KVM: nVMX: do not pin the VMCS12
  KVM: avoid using rcu_dereference_protected
  KVM: X86: init irq->level in kvm_pv_kick_cpu_op
  KVM: X86: Fix loss of pending INIT due to race
  KVM: async_pf: make rcu irq exit if not triggered from idle task
  KVM: nVMX: fixes to nested virt interrupt injection
  KVM: nVMX: do not fill vm_exit_intr_error_code in prepare_vmcs12
  KVM: arm/arm64: Handle hva aging while destroying the vm
  KVM: arm/arm64: PMU: Fix overflow interrupt injection
  KVM: arm/arm64: Fix bug in advertising KVM_CAP_MSI_DEVID capability
parents 0d5b9944 53a5abd8
......@@ -764,7 +764,7 @@ static bool access_pmovs(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (p->is_write) {
if (r->CRm & 0x2)
/* accessing PMOVSSET_EL0 */
kvm_pmu_overflow_set(vcpu, p->regval & mask);
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= (p->regval & mask);
else
/* accessing PMOVSCLR_EL0 */
vcpu_sys_reg(vcpu, PMOVSSET_EL0) &= ~(p->regval & mask);
......
......@@ -151,6 +151,8 @@ void kvm_async_pf_task_wait(u32 token)
if (hlist_unhashed(&n.link))
break;
rcu_irq_exit();
if (!n.halted) {
local_irq_enable();
schedule();
......@@ -159,11 +161,11 @@ void kvm_async_pf_task_wait(u32 token)
/*
* We cannot reschedule. So halt.
*/
rcu_irq_exit();
native_safe_halt();
local_irq_disable();
rcu_irq_enter();
}
rcu_irq_enter();
}
if (!n.halted)
finish_swait(&n.wq, &wait);
......
......@@ -2430,6 +2430,16 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = error_code;
/*
* FIXME: we should not write CR2 when L1 intercepts an L2 #PF exception.
* The fix is to add the ancillary datum (CR2 or DR6) to structs
* kvm_queued_exception and kvm_vcpu_events, so that CR2 and DR6 can be
* written only when inject_pending_event runs (DR6 would written here
* too). This should be conditional on a new capability---if the
* capability is disabled, kvm_multiple_exception would write the
* ancillary information to CR2 or DR6, for backwards ABI-compatibility.
*/
if (svm->vcpu.arch.exception.nested_apf)
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
else
......
This diff is collapsed.
......@@ -3159,15 +3159,18 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
kvm_set_hflags(vcpu, hflags);
vcpu->arch.smi_pending = events->smi.pending;
if (events->smi.smm_inside_nmi)
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
else
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
if (lapic_in_kernel(vcpu)) {
if (events->smi.latched_init)
set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
if (events->smi.smm) {
if (events->smi.smm_inside_nmi)
vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
else
clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
if (lapic_in_kernel(vcpu)) {
if (events->smi.latched_init)
set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
else
clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
}
}
}
......@@ -6215,6 +6218,7 @@ static void kvm_pv_kick_cpu_op(struct kvm *kvm, unsigned long flags, int apicid)
lapic_irq.shorthand = 0;
lapic_irq.dest_mode = 0;
lapic_irq.level = 0;
lapic_irq.dest_id = apicid;
lapic_irq.msi_redir_hint = false;
......
......@@ -48,7 +48,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu);
......@@ -86,7 +85,6 @@ static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
......
......@@ -477,7 +477,8 @@ struct kvm {
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock));
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
}
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
......@@ -570,7 +571,8 @@ void kvm_put_kvm(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{
return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock));
lockdep_is_held(&kvm->slots_lock) ||
!refcount_read(&kvm->users_count));
}
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
......
......@@ -1718,12 +1718,16 @@ static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *
int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
{
if (!kvm->arch.pgd)
return 0;
trace_kvm_age_hva(start, end);
return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL);
}
int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
{
if (!kvm->arch.pgd)
return 0;
trace_kvm_test_age_hva(hva);
return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL);
}
......
......@@ -203,11 +203,15 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
return reg;
}
static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
bool overflow = !!kvm_pmu_overflow_status(vcpu);
bool overflow;
if (!kvm_arm_pmu_v3_ready(vcpu))
return;
overflow = !!kvm_pmu_overflow_status(vcpu);
if (pmu->irq_level == overflow)
return;
......@@ -215,33 +219,11 @@ static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu)
if (likely(irqchip_in_kernel(vcpu->kvm))) {
int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
pmu->irq_num, overflow,
&vcpu->arch.pmu);
pmu->irq_num, overflow, pmu);
WARN_ON(ret);
}
}
/**
* kvm_pmu_overflow_set - set PMU overflow interrupt
* @vcpu: The vcpu pointer
* @val: the value guest writes to PMOVSSET register
*/
void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val)
{
if (val == 0)
return;
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val;
kvm_pmu_check_overflow(vcpu);
}
static void kvm_pmu_update_state(struct kvm_vcpu *vcpu)
{
if (!kvm_arm_pmu_v3_ready(vcpu))
return;
kvm_pmu_check_overflow(vcpu);
}
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
......@@ -303,7 +285,7 @@ static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
}
/**
* When perf event overflows, call kvm_pmu_overflow_set to set overflow status.
* When the perf event overflows, set the overflow status and inform the vcpu.
*/
static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
struct perf_sample_data *data,
......@@ -313,7 +295,12 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
int idx = pmc->idx;
kvm_pmu_overflow_set(vcpu, BIT(idx));
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
if (kvm_pmu_overflow_status(vcpu)) {
kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
kvm_vcpu_kick(vcpu);
}
}
/**
......@@ -341,7 +328,7 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
reg = lower_32_bits(reg);
vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
if (!reg)
kvm_pmu_overflow_set(vcpu, BIT(i));
vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
}
}
}
......
......@@ -285,9 +285,6 @@ int vgic_init(struct kvm *kvm)
if (ret)
goto out;
if (vgic_has_its(kvm))
dist->msis_require_devid = true;
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_vgic_vcpu_enable(vcpu);
......
......@@ -1598,6 +1598,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
INIT_LIST_HEAD(&its->device_list);
INIT_LIST_HEAD(&its->collection_list);
dev->kvm->arch.vgic.msis_require_devid = true;
dev->kvm->arch.vgic.has_its = true;
its->enabled = false;
its->dev = dev;
......
......@@ -369,7 +369,7 @@ static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu,
return;
do {
old_propbaser = dist->propbaser;
old_propbaser = READ_ONCE(dist->propbaser);
propbaser = old_propbaser;
propbaser = update_64bit_reg(propbaser, addr & 4, len, val);
propbaser = vgic_sanitise_propbaser(propbaser);
......@@ -397,7 +397,7 @@ static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu,
return;
do {
old_pendbaser = vgic_cpu->pendbaser;
old_pendbaser = READ_ONCE(vgic_cpu->pendbaser);
pendbaser = old_pendbaser;
pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val);
pendbaser = vgic_sanitise_pendbaser(pendbaser);
......
......@@ -717,10 +717,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
hardware_disable_all();
out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(rcu_access_pointer(kvm->buses[i]));
kfree(kvm_get_bus(kvm, i));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm,
rcu_dereference_protected(kvm->memslots[i], 1));
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
return ERR_PTR(r);
......@@ -754,9 +753,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus;
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
bus = rcu_dereference_protected(kvm->buses[i], 1);
if (bus)
kvm_io_bus_destroy(bus);
kvm->buses[i] = NULL;
......@@ -770,8 +768,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm,
rcu_dereference_protected(kvm->memslots[i], 1));
kvm_free_memslots(kvm, __kvm_memslots(kvm, i));
cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment