Commit ca052cfd authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "ARM:

   - Fix unexpected sign extension of KVM_ARM_DEVICE_ID_MASK

   - Tidy-up handling of AArch32 on asymmetric systems

  x86:

   - Fix 'missing ENDBR' BUG for fastop functions

  Generic:

   - Some cleanup and static analyzer patches

   - More fixes to KVM_CREATE_VM unwind paths"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: Drop unnecessary initialization of "ops" in kvm_ioctl_create_device()
  KVM: Drop unnecessary initialization of "npages" in hva_to_pfn_slow()
  x86/kvm: Fix "missing ENDBR" BUG for fastop functions
  x86/kvm: Simplify FOP_SETCC()
  x86/ibt, objtool: Add IBT_NOSEAL()
  KVM: Rename mmu_notifier_* to mmu_invalidate_*
  KVM: Rename KVM_PRIVATE_MEM_SLOTS to KVM_INTERNAL_MEM_SLOTS
  KVM: MIPS: remove unnecessary definition of KVM_PRIVATE_MEM_SLOTS
  KVM: Move coalesced MMIO initialization (back) into kvm_create_vm()
  KVM: Unconditionally get a ref to /dev/kvm module when creating a VM
  KVM: Properly unwind VM creation if creating debugfs fails
  KVM: arm64: Reject 32bit user PSTATE on asymmetric systems
  KVM: arm64: Treat PMCR_EL1.LC as RES1 on asymmetric systems
  KVM: arm64: Fix compile error due to sign extension
parents 42c54d54 959d6c4a
......@@ -929,6 +929,10 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu);
(system_supports_mte() && \
test_bit(KVM_ARCH_FLAG_MTE_ENABLED, &(kvm)->arch.flags))
#define kvm_supports_32bit_el0() \
(system_supports_32bit_el0() && \
!static_branch_unlikely(&arm64_mismatched_32bit_el0))
int kvm_trng_call(struct kvm_vcpu *vcpu);
#ifdef CONFIG_KVM
extern phys_addr_t hyp_mem_base;
......
......@@ -75,9 +75,11 @@ struct kvm_regs {
/* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
#define KVM_ARM_DEVICE_TYPE_SHIFT 0
#define KVM_ARM_DEVICE_TYPE_MASK (0xffff << KVM_ARM_DEVICE_TYPE_SHIFT)
#define KVM_ARM_DEVICE_TYPE_MASK GENMASK(KVM_ARM_DEVICE_TYPE_SHIFT + 15, \
KVM_ARM_DEVICE_TYPE_SHIFT)
#define KVM_ARM_DEVICE_ID_SHIFT 16
#define KVM_ARM_DEVICE_ID_MASK (0xffff << KVM_ARM_DEVICE_ID_SHIFT)
#define KVM_ARM_DEVICE_ID_MASK GENMASK(KVM_ARM_DEVICE_ID_SHIFT + 15, \
KVM_ARM_DEVICE_ID_SHIFT)
/* Supported device IDs */
#define KVM_ARM_DEVICE_VGIC_V2 0
......
......@@ -757,8 +757,7 @@ static bool vcpu_mode_is_bad_32bit(struct kvm_vcpu *vcpu)
if (likely(!vcpu_mode_is_32bit(vcpu)))
return false;
return !system_supports_32bit_el0() ||
static_branch_unlikely(&arm64_mismatched_32bit_el0);
return !kvm_supports_32bit_el0();
}
/**
......
......@@ -242,7 +242,7 @@ static int set_core_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
u64 mode = (*(u64 *)valp) & PSR_AA32_MODE_MASK;
switch (mode) {
case PSR_AA32_MODE_USR:
if (!system_supports_32bit_el0())
if (!kvm_supports_32bit_el0())
return -EINVAL;
break;
case PSR_AA32_MODE_FIQ:
......
......@@ -993,7 +993,7 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
* THP doesn't start to split while we are adjusting the
* refcounts.
*
* We are sure this doesn't happen, because mmu_notifier_retry
* We are sure this doesn't happen, because mmu_invalidate_retry
* was successful and we are holding the mmu_lock, so if this
* THP is trying to split, it will be blocked in the mmu
* notifier before touching any of the pages, specifically
......@@ -1188,9 +1188,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
return ret;
}
mmu_seq = vcpu->kvm->mmu_notifier_seq;
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
/*
* Ensure the read of mmu_notifier_seq happens before we call
* Ensure the read of mmu_invalidate_seq happens before we call
* gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
* the page we just got a reference to gets unmapped before we have a
* chance to grab the mmu_lock, which ensure that if the page gets
......@@ -1246,7 +1246,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
else
write_lock(&kvm->mmu_lock);
pgt = vcpu->arch.hw_mmu->pgt;
if (mmu_notifier_retry(kvm, mmu_seq))
if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
/*
......
......@@ -652,7 +652,7 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
*/
val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
| (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
if (!system_supports_32bit_el0())
if (!kvm_supports_32bit_el0())
val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, r->reg) = val;
}
......@@ -701,7 +701,7 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
val = __vcpu_sys_reg(vcpu, PMCR_EL0);
val &= ~ARMV8_PMU_PMCR_MASK;
val |= p->regval & ARMV8_PMU_PMCR_MASK;
if (!system_supports_32bit_el0())
if (!kvm_supports_32bit_el0())
val |= ARMV8_PMU_PMCR_LC;
__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
kvm_pmu_handle_pmcr(vcpu, val);
......
......@@ -84,8 +84,6 @@
#define KVM_MAX_VCPUS 16
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 0
#define KVM_HALT_POLL_NS_DEFAULT 500000
......
......@@ -615,17 +615,17 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
* Used to check for invalidations in progress, of the pfn that is
* returned by pfn_to_pfn_prot below.
*/
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
/*
* Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in
* gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
* Ensure the read of mmu_invalidate_seq isn't reordered with PTE reads
* in gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't
* risk the page we get a reference to getting unmapped before we have a
* chance to grab the mmu_lock without mmu_notifier_retry() noticing.
* chance to grab the mmu_lock without mmu_invalidate_retry() noticing.
*
* This smp_rmb() pairs with the effective smp_wmb() of the combination
* of the pte_unmap_unlock() after the PTE is zapped, and the
* spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before
* mmu_notifier_seq is incremented.
* mmu_invalidate_seq is incremented.
*/
smp_rmb();
......@@ -638,7 +638,7 @@ static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa,
spin_lock(&kvm->mmu_lock);
/* Check if an invalidation has taken place since we got pfn */
if (mmu_notifier_retry(kvm, mmu_seq)) {
if (mmu_invalidate_retry(kvm, mmu_seq)) {
/*
* This can happen when mappings are changed asynchronously, but
* also synchronously if a COW is triggered by
......
......@@ -666,7 +666,7 @@ static inline pte_t *find_kvm_host_pte(struct kvm *kvm, unsigned long mmu_seq,
VM_WARN(!spin_is_locked(&kvm->mmu_lock),
"%s called with kvm mmu_lock not held \n", __func__);
if (mmu_notifier_retry(kvm, mmu_seq))
if (mmu_invalidate_retry(kvm, mmu_seq))
return NULL;
pte = __find_linux_pte(kvm->mm->pgd, ea, NULL, hshift);
......
......@@ -90,7 +90,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
unsigned long pfn;
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/* Get host physical address for gpa */
......@@ -151,7 +151,7 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
cpte = kvmppc_mmu_hpte_cache_next(vcpu);
spin_lock(&kvm->mmu_lock);
if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
if (!cpte || mmu_invalidate_retry(kvm, mmu_seq)) {
r = -EAGAIN;
goto out_unlock;
}
......
......@@ -578,7 +578,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
return -EFAULT;
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
ret = -EFAULT;
......@@ -693,7 +693,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
/* Check if we might have been invalidated; let the guest retry if so */
ret = RESUME_GUEST;
if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) {
if (mmu_invalidate_retry(vcpu->kvm, mmu_seq)) {
unlock_rmap(rmap);
goto out_unlock;
}
......
......@@ -640,7 +640,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte,
/* Check if we might have been invalidated; let the guest retry if so */
spin_lock(&kvm->mmu_lock);
ret = -EAGAIN;
if (mmu_notifier_retry(kvm, mmu_seq))
if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
/* Now traverse again under the lock and change the tree */
......@@ -830,7 +830,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
bool large_enable;
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/*
......@@ -1191,7 +1191,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm,
* Increase the mmu notifier sequence number to prevent any page
* fault that read the memslot earlier from writing a PTE.
*/
kvm->mmu_notifier_seq++;
kvm->mmu_invalidate_seq++;
spin_unlock(&kvm->mmu_lock);
}
......
......@@ -1580,7 +1580,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu,
/* 2. Find the host pte for this L1 guest real address */
/* Used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/* See if can find translation in our partition scoped tables for L1 */
......
......@@ -219,7 +219,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
g_ptel = ptel;
/* used later to detect if we might have been invalidated */
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/* Find the memslot (if any) for this address */
......@@ -366,7 +366,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
rmap = real_vmalloc_addr(rmap);
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
if (mmu_notifier_retry(kvm, mmu_seq)) {
if (mmu_invalidate_retry(kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
......@@ -932,7 +932,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu,
int i;
/* Used later to detect if we might have been invalidated */
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
......@@ -960,7 +960,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu,
long ret = H_SUCCESS;
/* Used later to detect if we might have been invalidated */
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock);
......
......@@ -339,7 +339,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
unsigned long flags;
/* used to check for invalidations in progress */
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
/*
......@@ -460,7 +460,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500,
}
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq)) {
if (mmu_invalidate_retry(kvm, mmu_seq)) {
ret = -EAGAIN;
goto out;
}
......
......@@ -666,7 +666,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
return ret;
}
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
hfn = gfn_to_pfn_prot(kvm, gfn, is_write, &writable);
if (hfn == KVM_PFN_ERR_HWPOISON) {
......@@ -686,7 +686,7 @@ int kvm_riscv_gstage_map(struct kvm_vcpu *vcpu,
spin_lock(&kvm->mmu_lock);
if (mmu_notifier_retry(kvm, mmu_seq))
if (mmu_invalidate_retry(kvm, mmu_seq))
goto out_unlock;
if (writable) {
......
......@@ -31,6 +31,16 @@
#define __noendbr __attribute__((nocf_check))
/*
* Create a dummy function pointer reference to prevent objtool from marking
* the function as needing to be "sealed" (i.e. ENDBR converted to NOP by
* apply_ibt_endbr()).
*/
#define IBT_NOSEAL(fname) \
".pushsection .discard.ibt_endbr_noseal\n\t" \
_ASM_PTR fname "\n\t" \
".popsection\n\t"
static inline __attribute_const__ u32 gen_endbr(void)
{
u32 endbr;
......@@ -84,6 +94,7 @@ extern __noendbr void ibt_restore(u64 save);
#ifndef __ASSEMBLY__
#define ASM_ENDBR
#define IBT_NOSEAL(name)
#define __noendbr
......
......@@ -53,7 +53,7 @@
#define KVM_MAX_VCPU_IDS (KVM_MAX_VCPUS * KVM_VCPU_ID_RATIO)
/* memory slots that are not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 3
#define KVM_INTERNAL_MEM_SLOTS 3
#define KVM_HALT_POLL_NS_DEFAULT 200000
......
......@@ -326,7 +326,8 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
".align " __stringify(FASTOP_SIZE) " \n\t" \
".type " name ", @function \n\t" \
name ":\n\t" \
ASM_ENDBR
ASM_ENDBR \
IBT_NOSEAL(name)
#define FOP_FUNC(name) \
__FOP_FUNC(#name)
......@@ -446,27 +447,12 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
FOP_END
/* Special case for SETcc - 1 instruction per cc */
/*
* Depending on .config the SETcc functions look like:
*
* ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
* SETcc %al [3 bytes]
* RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK]
* INT3 [1 byte; CONFIG_SLS]
*/
#define SETCC_ALIGN 16
#define FOP_SETCC(op) \
".align " __stringify(SETCC_ALIGN) " \n\t" \
".type " #op ", @function \n\t" \
#op ": \n\t" \
ASM_ENDBR \
FOP_FUNC(op) \
#op " %al \n\t" \
__FOP_RET(#op) \
".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
FOP_RET(op)
__FOP_START(setcc, SETCC_ALIGN)
FOP_START(setcc)
FOP_SETCC(seto)
FOP_SETCC(setno)
FOP_SETCC(setc)
......@@ -1079,7 +1065,7 @@ static int em_bsr_c(struct x86_emulate_ctxt *ctxt)
static __always_inline u8 test_cc(unsigned int condition, unsigned long flags)
{
u8 rc;
void (*fop)(void) = (void *)em_setcc + SETCC_ALIGN * (condition & 0xf);
void (*fop)(void) = (void *)em_setcc + FASTOP_SIZE * (condition & 0xf);
flags = (flags & EFLAGS_MASK) | X86_EFLAGS_IF;
asm("push %[flags]; popf; " CALL_NOSPEC
......
......@@ -2914,7 +2914,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
* If addresses are being invalidated, skip prefetching to avoid
* accidentally prefetching those addresses.
*/
if (unlikely(vcpu->kvm->mmu_notifier_count))
if (unlikely(vcpu->kvm->mmu_invalidate_in_progress))
return;
__direct_pte_prefetch(vcpu, sp, sptep);
......@@ -2928,7 +2928,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
*
* There are several ways to safely use this helper:
*
* - Check mmu_notifier_retry_hva() after grabbing the mapping level, before
* - Check mmu_invalidate_retry_hva() after grabbing the mapping level, before
* consuming it. In this case, mmu_lock doesn't need to be held during the
* lookup, but it does need to be held while checking the MMU notifier.
*
......@@ -3056,7 +3056,7 @@ void kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
return;
/*
* mmu_notifier_retry() was successful and mmu_lock is held, so
* mmu_invalidate_retry() was successful and mmu_lock is held, so
* the pmd can't be split from under us.
*/
fault->goal_level = fault->req_level;
......@@ -4203,7 +4203,7 @@ static bool is_page_fault_stale(struct kvm_vcpu *vcpu,
return true;
return fault->slot &&
mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
mmu_invalidate_retry_hva(vcpu->kvm, mmu_seq, fault->hva);
}
static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
......@@ -4227,7 +4227,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
if (r)
return r;
mmu_seq = vcpu->kvm->mmu_notifier_seq;
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
smp_rmb();
r = kvm_faultin_pfn(vcpu, fault);
......@@ -6055,7 +6055,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
write_lock(&kvm->mmu_lock);
kvm_inc_notifier_count(kvm, gfn_start, gfn_end);
kvm_mmu_invalidate_begin(kvm, gfn_start, gfn_end);
flush = kvm_rmap_zap_gfn_range(kvm, gfn_start, gfn_end);
......@@ -6069,7 +6069,7 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
kvm_flush_remote_tlbs_with_address(kvm, gfn_start,
gfn_end - gfn_start);
kvm_dec_notifier_count(kvm, gfn_start, gfn_end);
kvm_mmu_invalidate_end(kvm, gfn_start, gfn_end);
write_unlock(&kvm->mmu_lock);
}
......
......@@ -589,7 +589,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
* If addresses are being invalidated, skip prefetching to avoid
* accidentally prefetching those addresses.
*/
if (unlikely(vcpu->kvm->mmu_notifier_count))
if (unlikely(vcpu->kvm->mmu_invalidate_in_progress))
return;
if (sp->role.direct)
......@@ -838,7 +838,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault
else
fault->max_level = walker.level;
mmu_seq = vcpu->kvm->mmu_notifier_seq;
mmu_seq = vcpu->kvm->mmu_invalidate_seq;
smp_rmb();
r = kvm_faultin_pfn(vcpu, fault);
......
......@@ -656,12 +656,12 @@ struct kvm_irq_routing_table {
};
#endif
#ifndef KVM_PRIVATE_MEM_SLOTS
#define KVM_PRIVATE_MEM_SLOTS 0
#ifndef KVM_INTERNAL_MEM_SLOTS
#define KVM_INTERNAL_MEM_SLOTS 0
#endif
#define KVM_MEM_SLOTS_NUM SHRT_MAX
#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_PRIVATE_MEM_SLOTS)
#define KVM_USER_MEM_SLOTS (KVM_MEM_SLOTS_NUM - KVM_INTERNAL_MEM_SLOTS)
#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
......@@ -765,10 +765,10 @@ struct kvm {
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
struct mmu_notifier mmu_notifier;
unsigned long mmu_notifier_seq;
long mmu_notifier_count;
unsigned long mmu_notifier_range_start;
unsigned long mmu_notifier_range_end;
unsigned long mmu_invalidate_seq;
long mmu_invalidate_in_progress;
unsigned long mmu_invalidate_range_start;
unsigned long mmu_invalidate_range_end;
#endif
struct list_head devices;
u64 manual_dirty_log_protect;
......@@ -1357,10 +1357,10 @@ void kvm_mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc);
void *kvm_mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
#endif
void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
unsigned long end);
void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
unsigned long end);
void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
unsigned long end);
void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
unsigned long end);
long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
......@@ -1907,42 +1907,44 @@ extern const struct kvm_stats_header kvm_vcpu_stats_header;
extern const struct _kvm_stats_desc kvm_vcpu_stats_desc[];
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
static inline int mmu_invalidate_retry(struct kvm *kvm, unsigned long mmu_seq)
{
if (unlikely(kvm->mmu_notifier_count))
if (unlikely(kvm->mmu_invalidate_in_progress))
return 1;
/*
* Ensure the read of mmu_notifier_count happens before the read
* of mmu_notifier_seq. This interacts with the smp_wmb() in
* mmu_notifier_invalidate_range_end to make sure that the caller
* either sees the old (non-zero) value of mmu_notifier_count or
* the new (incremented) value of mmu_notifier_seq.
* PowerPC Book3s HV KVM calls this under a per-page lock
* rather than under kvm->mmu_lock, for scalability, so
* can't rely on kvm->mmu_lock to keep things ordered.
* Ensure the read of mmu_invalidate_in_progress happens before
* the read of mmu_invalidate_seq. This interacts with the
* smp_wmb() in mmu_notifier_invalidate_range_end to make sure
* that the caller either sees the old (non-zero) value of
* mmu_invalidate_in_progress or the new (incremented) value of
* mmu_invalidate_seq.
*
* PowerPC Book3s HV KVM calls this under a per-page lock rather
* than under kvm->mmu_lock, for scalability, so can't rely on
* kvm->mmu_lock to keep things ordered.
*/
smp_rmb();
if (kvm->mmu_notifier_seq != mmu_seq)
if (kvm->mmu_invalidate_seq != mmu_seq)
return 1;
return 0;
}
static inline int mmu_notifier_retry_hva(struct kvm *kvm,
unsigned long mmu_seq,
unsigned long hva)
static inline int mmu_invalidate_retry_hva(struct kvm *kvm,
unsigned long mmu_seq,
unsigned long hva)
{
lockdep_assert_held(&kvm->mmu_lock);
/*
* If mmu_notifier_count is non-zero, then the range maintained by
* kvm_mmu_notifier_invalidate_range_start contains all addresses that
* might be being invalidated. Note that it may include some false
* If mmu_invalidate_in_progress is non-zero, then the range maintained
* by kvm_mmu_notifier_invalidate_range_start contains all addresses
* that might be being invalidated. Note that it may include some false
* positives, due to shortcuts when handing concurrent invalidations.
*/
if (unlikely(kvm->mmu_notifier_count) &&
hva >= kvm->mmu_notifier_range_start &&
hva < kvm->mmu_notifier_range_end)
if (unlikely(kvm->mmu_invalidate_in_progress) &&
hva >= kvm->mmu_invalidate_range_start &&
hva < kvm->mmu_invalidate_range_end)
return 1;
if (kvm->mmu_notifier_seq != mmu_seq)
if (kvm->mmu_invalidate_seq != mmu_seq)
return 1;
return 0;
}
......
......@@ -4096,7 +4096,8 @@ static int validate_ibt(struct objtool_file *file)
* These sections can reference text addresses, but not with
* the intent to indirect branch to them.
*/
if (!strncmp(sec->name, ".discard", 8) ||
if ((!strncmp(sec->name, ".discard", 8) &&
strcmp(sec->name, ".discard.ibt_endbr_noseal")) ||
!strncmp(sec->name, ".debug", 6) ||
!strcmp(sec->name, ".altinstructions") ||
!strcmp(sec->name, ".ibt_endbr_seal") ||
......
......@@ -702,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
/*
* .change_pte() must be surrounded by .invalidate_range_{start,end}().
* If mmu_notifier_count is zero, then no in-progress invalidations,
* including this one, found a relevant memslot at start(); rechecking
* memslots here is unnecessary. Note, a false positive (count elevated
* by a different invalidation) is sub-optimal but functionally ok.
* If mmu_invalidate_in_progress is zero, then no in-progress
* invalidations, including this one, found a relevant memslot at
* start(); rechecking memslots here is unnecessary. Note, a false
* positive (count elevated by a different invalidation) is sub-optimal
* but functionally ok.
*/
WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count));
if (!READ_ONCE(kvm->mmu_notifier_count))
if (!READ_ONCE(kvm->mmu_invalidate_in_progress))
return;
kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);
}
void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
unsigned long end)
void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start,
unsigned long end)
{
/*
* The count increase must become visible at unlock time as no
* spte can be established without taking the mmu_lock and
* count is also read inside the mmu_lock critical section.
*/
kvm->mmu_notifier_count++;
if (likely(kvm->mmu_notifier_count == 1)) {
kvm->mmu_notifier_range_start = start;
kvm->mmu_notifier_range_end = end;
kvm->mmu_invalidate_in_progress++;
if (likely(kvm->mmu_invalidate_in_progress == 1)) {
kvm->mmu_invalidate_range_start = start;
kvm->mmu_invalidate_range_end = end;
} else {
/*
* Fully tracking multiple concurrent ranges has diminishing
......@@ -736,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,
* accumulate and persist until all outstanding invalidates
* complete.
*/
kvm->mmu_notifier_range_start =
min(kvm->mmu_notifier_range_start, start);
kvm->mmu_notifier_range_end =
max(kvm->mmu_notifier_range_end, end);
kvm->mmu_invalidate_range_start =
min(kvm->mmu_invalidate_range_start, start);
kvm->mmu_invalidate_range_end =
max(kvm->mmu_invalidate_range_end, end);
}
}
......@@ -752,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
.end = range->end,
.pte = __pte(0),
.handler = kvm_unmap_gfn_range,
.on_lock = kvm_inc_notifier_count,
.on_lock = kvm_mmu_invalidate_begin,
.on_unlock = kvm_arch_guest_memory_reclaimed,
.flush_on_ret = true,
.may_block = mmu_notifier_range_blockable(range),
......@@ -763,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
/*
* Prevent memslot modification between range_start() and range_end()
* so that conditionally locking provides the same result in both
* functions. Without that guarantee, the mmu_notifier_count
* functions. Without that guarantee, the mmu_invalidate_in_progress
* adjustments will be imbalanced.
*
* Pairs with the decrement in range_end().
......@@ -779,7 +780,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
* any given time, and the caches themselves can check for hva overlap,
* i.e. don't need to rely on memslot overlap checks for performance.
* Because this runs without holding mmu_lock, the pfn caches must use
* mn_active_invalidate_count (see above) instead of mmu_notifier_count.
* mn_active_invalidate_count (see above) instead of
* mmu_invalidate_in_progress.
*/
gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
hva_range.may_block);
......@@ -789,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
return 0;
}
void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start,
unsigned long end)
void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start,
unsigned long end)
{
/*
* This sequence increase will notify the kvm page fault that
* the page that is going to be mapped in the spte could have
* been freed.
*/
kvm->mmu_notifier_seq++;
kvm->mmu_invalidate_seq++;
smp_wmb();
/*
* The above sequence increase must be visible before the
* below count decrease, which is ensured by the smp_wmb above
* in conjunction with the smp_rmb in mmu_notifier_retry().
* in conjunction with the smp_rmb in mmu_invalidate_retry().
*/
kvm->mmu_notifier_count--;
kvm->mmu_invalidate_in_progress--;
}
static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
......@@ -816,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
.end = range->end,
.pte = __pte(0),
.handler = (void *)kvm_null_fn,
.on_lock = kvm_dec_notifier_count,
.on_lock = kvm_mmu_invalidate_end,
.on_unlock = (void *)kvm_null_fn,
.flush_on_ret = false,
.may_block = mmu_notifier_range_blockable(range),
......@@ -837,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
if (wake)
rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait);
BUG_ON(kvm->mmu_notifier_count < 0);
BUG_ON(kvm->mmu_invalidate_in_progress < 0);
}
static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
......@@ -1134,6 +1136,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
if (!kvm)
return ERR_PTR(-ENOMEM);
/* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */
__module_get(kvm_chardev_ops.owner);
KVM_MMU_LOCK_INIT(kvm);
mmgrab(current->mm);
kvm->mm = current->mm;
......@@ -1211,9 +1216,17 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
if (r)
goto out_err_no_mmu_notifier;
r = kvm_coalesced_mmio_init(kvm);
if (r < 0)
goto out_no_coalesced_mmio;
r = kvm_create_vm_debugfs(kvm, fdname);
if (r)
goto out_err_no_debugfs;
r = kvm_arch_post_init_vm(kvm);
if (r)
goto out_err_mmu_notifier;
goto out_err;
mutex_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
......@@ -1222,25 +1235,13 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
preempt_notifier_inc();
kvm_init_pm_notifier(kvm);
/*
* When the fd passed to this ioctl() is opened it pins the module,
* but try_module_get() also prevents getting a reference if the module
* is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait").
*/
if (!try_module_get(kvm_chardev_ops.owner)) {
r = -ENODEV;
goto out_err_mmu_notifier;
}
r = kvm_create_vm_debugfs(kvm, fdname);
if (r)
goto out_err;
return kvm;
out_err:
module_put(kvm_chardev_ops.owner);
out_err_mmu_notifier:
kvm_destroy_vm_debugfs(kvm);
out_err_no_debugfs:
kvm_coalesced_mmio_free(kvm);
out_no_coalesced_mmio:
#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
if (kvm->mmu_notifier.ops)
mmu_notifier_unregister(&kvm->mmu_notifier, current->mm);
......@@ -1259,6 +1260,7 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)
out_err_no_srcu:
kvm_arch_free_vm(kvm);
mmdrop(current->mm);
module_put(kvm_chardev_ops.owner);
return ERR_PTR(r);
}
......@@ -2516,7 +2518,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
{
unsigned int flags = FOLL_HWPOISON;
struct page *page;
int npages = 0;
int npages;
might_sleep();
......@@ -4378,7 +4380,7 @@ void kvm_unregister_device_ops(u32 type)
static int kvm_ioctl_create_device(struct kvm *kvm,
struct kvm_create_device *cd)
{
const struct kvm_device_ops *ops = NULL;
const struct kvm_device_ops *ops;
struct kvm_device *dev;
bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
int type;
......@@ -4913,11 +4915,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
goto put_fd;
}
#ifdef CONFIG_KVM_MMIO
r = kvm_coalesced_mmio_init(kvm);
if (r < 0)
goto put_kvm;
#endif
file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);
if (IS_ERR(file)) {
r = PTR_ERR(file);
......
......@@ -112,27 +112,28 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
{
/*
* mn_active_invalidate_count acts for all intents and purposes
* like mmu_notifier_count here; but the latter cannot be used
* here because the invalidation of caches in the mmu_notifier
* event occurs _before_ mmu_notifier_count is elevated.
* like mmu_invalidate_in_progress here; but the latter cannot
* be used here because the invalidation of caches in the
* mmu_notifier event occurs _before_ mmu_invalidate_in_progress
* is elevated.
*
* Note, it does not matter that mn_active_invalidate_count
* is not protected by gpc->lock. It is guaranteed to
* be elevated before the mmu_notifier acquires gpc->lock, and
* isn't dropped until after mmu_notifier_seq is updated.
* isn't dropped until after mmu_invalidate_seq is updated.
*/
if (kvm->mn_active_invalidate_count)
return true;
/*
* Ensure mn_active_invalidate_count is read before
* mmu_notifier_seq. This pairs with the smp_wmb() in
* mmu_invalidate_seq. This pairs with the smp_wmb() in
* mmu_notifier_invalidate_range_end() to guarantee either the
* old (non-zero) value of mn_active_invalidate_count or the
* new (incremented) value of mmu_notifier_seq is observed.
* new (incremented) value of mmu_invalidate_seq is observed.
*/
smp_rmb();
return kvm->mmu_notifier_seq != mmu_seq;
return kvm->mmu_invalidate_seq != mmu_seq;
}
static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
......@@ -155,7 +156,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
gpc->valid = false;
do {
mmu_seq = kvm->mmu_notifier_seq;
mmu_seq = kvm->mmu_invalidate_seq;
smp_rmb();
write_unlock_irq(&gpc->lock);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment