Commit 6157ce59 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "x86 has lots of small bugfixes, mostly one liners. It's quite late in
  5.11-rc but none of them are related to this merge window; it's just
  bugs coming in at the wrong time.

  Of note among the others is "KVM: x86: Allow guests to see
  MSR_IA32_TSX_CTRL even if tsx=off" that fixes a live migration failure
  seen on distros that hadn't switched to tsx=off right away.

  ARM:
  - Avoid clobbering extra registers on initialisation"

[ Sean Christopherson notes that commit 943dea8a ("KVM: x86: Update
  emulator context mode if SYSENTER xfers to 64-bit mode") should have
  had authorship credited to Jonny Barker, not to him.  - Linus ]

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: Set so called 'reserved CR3 bits in LM mask' at vCPU reset
  KVM: x86/mmu: Fix TDP MMU zap collapsible SPTEs
  KVM: x86: cleanup CR3 reserved bits checks
  KVM: SVM: Treat SVM as unsupported when running as an SEV guest
  KVM: x86: Update emulator context mode if SYSENTER xfers to 64-bit mode
  KVM: x86: Supplement __cr4_reserved_bits() with X86_FEATURE_PCID check
  KVM/x86: assign hva with the right value to vm_munmap the pages
  KVM: x86: Allow guests to see MSR_IA32_TSX_CTRL even if tsx=off
  Fix unsynchronized access to sev members through svm_register_enc_region
  KVM: Documentation: Fix documentation for nested.
  KVM: x86: fix CPUID entries returned by KVM_GET_CPUID2 ioctl
  KVM: arm64: Don't clobber x4 in __do_hyp_init
parents 97ba0c74 031b91a5
......@@ -37,8 +37,10 @@ call L2.
Running nested VMX
------------------
The nested VMX feature is disabled by default. It can be enabled by giving
the "nested=1" option to the kvm-intel module.
The nested VMX feature is enabled by default since Linux kernel v4.20. For
older Linux kernel, it can be enabled by giving the "nested=1" option to the
kvm-intel module.
No modifications are required to user space (qemu). However, qemu's default
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
......
......@@ -74,7 +74,7 @@ few:
Enabling "nested" (x86)
-----------------------
From Linux kernel v4.19 onwards, the ``nested`` KVM parameter is enabled
From Linux kernel v4.20 onwards, the ``nested`` KVM parameter is enabled
by default for Intel and AMD. (Though your Linux distribution might
override this default.)
......
......@@ -47,6 +47,8 @@ __invalid:
b .
/*
* Only uses x0..x3 so as to not clobber callee-saved SMCCC registers.
*
* x0: SMCCC function ID
* x1: struct kvm_nvhe_init_params PA
*/
......@@ -70,9 +72,9 @@ __do_hyp_init:
eret
1: mov x0, x1
mov x4, lr
bl ___kvm_hyp_init
mov lr, x4
mov x3, lr
bl ___kvm_hyp_init // Clobbers x0..x2
mov lr, x3
/* Hello, World! */
mov x0, #SMCCC_RET_SUCCESS
......@@ -82,8 +84,8 @@ SYM_CODE_END(__kvm_hyp_init)
/*
* Initialize the hypervisor in EL2.
*
* Only uses x0..x3 so as to not clobber callee-saved SMCCC registers
* and leave x4 for the caller.
* Only uses x0..x2 so as to not clobber callee-saved SMCCC registers
* and leave x3 for the caller.
*
* x0: struct kvm_nvhe_init_params PA
*/
......@@ -112,9 +114,9 @@ alternative_else_nop_endif
/*
* Set the PS bits in TCR_EL2.
*/
ldr x1, [x0, #NVHE_INIT_TCR_EL2]
tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3
msr tcr_el2, x1
ldr x0, [x0, #NVHE_INIT_TCR_EL2]
tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
msr tcr_el2, x0
isb
......@@ -193,7 +195,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
/* Enable MMU, set vectors and stack. */
mov x0, x28
bl ___kvm_hyp_init // Clobbers x0..x3
bl ___kvm_hyp_init // Clobbers x0..x2
/* Leave idmap. */
mov x0, x29
......
......@@ -321,7 +321,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
if (cpuid->nent < vcpu->arch.cpuid_nent)
goto out;
r = -EFAULT;
if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
if (copy_to_user(entries, vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
return 0;
......
......@@ -2879,6 +2879,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
(u32)msr_data;
if (efer & EFER_LMA)
ctxt->mode = X86EMUL_MODE_PROT64;
return X86EMUL_CONTINUE;
}
......
......@@ -1049,8 +1049,8 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
}
/*
* Clear non-leaf entries (and free associated page tables) which could
* be replaced by large mappings, for GFNs within the slot.
* Clear leaf entries which could be replaced by large mappings, for
* GFNs within the slot.
*/
static void zap_collapsible_spte_range(struct kvm *kvm,
struct kvm_mmu_page *root,
......@@ -1062,7 +1062,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
tdp_root_for_each_pte(iter, root, start, end) {
if (!is_shadow_present_pte(iter.old_spte) ||
is_last_spte(iter.old_spte, iter.level))
!is_last_spte(iter.old_spte, iter.level))
continue;
pfn = spte_to_pfn(iter.old_spte);
......
......@@ -231,6 +231,7 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
bool vmcb12_lma;
if ((vmcb12->save.efer & EFER_SVME) == 0)
......@@ -244,18 +245,10 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG);
if (!vmcb12_lma) {
if (vmcb12->save.cr4 & X86_CR4_PAE) {
if (vmcb12->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK)
return false;
} else {
if (vmcb12->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK)
return false;
}
} else {
if (vmcb12_lma) {
if (!(vmcb12->save.cr4 & X86_CR4_PAE) ||
!(vmcb12->save.cr0 & X86_CR0_PE) ||
(vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK))
(vmcb12->save.cr3 & vcpu->arch.cr3_lm_rsvd_bits))
return false;
}
if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
......
......@@ -342,6 +342,8 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long first, last;
int ret;
lockdep_assert_held(&kvm->lock);
if (ulen == 0 || uaddr + ulen < uaddr)
return ERR_PTR(-EINVAL);
......@@ -1119,12 +1121,20 @@ int svm_register_enc_region(struct kvm *kvm,
if (!region)
return -ENOMEM;
mutex_lock(&kvm->lock);
region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
if (IS_ERR(region->pages)) {
ret = PTR_ERR(region->pages);
mutex_unlock(&kvm->lock);
goto e_free;
}
region->uaddr = range->addr;
region->size = range->size;
list_add_tail(&region->list, &sev->regions_list);
mutex_unlock(&kvm->lock);
/*
* The guest may change the memory encryption attribute from C=0 -> C=1
* or vice versa for this memory range. Lets make sure caches are
......@@ -1133,13 +1143,6 @@ int svm_register_enc_region(struct kvm *kvm,
*/
sev_clflush_pages(region->pages, region->npages);
region->uaddr = range->addr;
region->size = range->size;
mutex_lock(&kvm->lock);
list_add_tail(&region->list, &sev->regions_list);
mutex_unlock(&kvm->lock);
return ret;
e_free:
......
......@@ -454,6 +454,11 @@ static int has_svm(void)
return 0;
}
if (sev_active()) {
pr_info("KVM is unsupported when running as an SEV guest\n");
return 0;
}
return 1;
}
......
......@@ -403,9 +403,6 @@ static inline bool gif_set(struct vcpu_svm *svm)
}
/* svm.c */
#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U
#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U
#define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U
#define MSR_INVALID 0xffffffffU
extern int sev;
......
......@@ -6860,11 +6860,20 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
switch (index) {
case MSR_IA32_TSX_CTRL:
/*
* No need to pass TSX_CTRL_CPUID_CLEAR through, so
* let's avoid changing CPUID bits under the host
* kernel's feet.
* TSX_CTRL_CPUID_CLEAR is handled in the CPUID
* interception. Keep the host value unchanged to avoid
* changing CPUID bits under the host kernel's feet.
*
* hle=0, rtm=0, tsx_ctrl=1 can be found with some
* combinations of new kernel and old userspace. If
* those guests run on a tsx=off host, do allow guests
* to use TSX_CTRL, but do not change the value on the
* host so that TSX remains always disabled.
*/
vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
if (boot_cpu_has(X86_FEATURE_RTM))
vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
else
vmx->guest_uret_msrs[j].mask = 0;
break;
default:
vmx->guest_uret_msrs[j].mask = -1ull;
......
......@@ -1394,16 +1394,24 @@ static u64 kvm_get_arch_capabilities(void)
if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO;
/*
* On TAA affected systems:
* - nothing to do if TSX is disabled on the host.
* - we emulate TSX_CTRL if present on the host.
* This lets the guest use VERW to clear CPU buffers.
*/
if (!boot_cpu_has(X86_FEATURE_RTM))
data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
else if (!boot_cpu_has_bug(X86_BUG_TAA))
if (!boot_cpu_has(X86_FEATURE_RTM)) {
/*
* If RTM=0 because the kernel has disabled TSX, the host might
* have TAA_NO or TSX_CTRL. Clear TAA_NO (the guest sees RTM=0
* and therefore knows that there cannot be TAA) but keep
* TSX_CTRL: some buggy userspaces leave it set on tsx=on hosts,
* and we want to allow migrating those guests to tsx=off hosts.
*/
data &= ~ARCH_CAP_TAA_NO;
} else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
data |= ARCH_CAP_TAA_NO;
} else {
/*
* Nothing to do here; we emulate TSX_CTRL if present on the
* host so the guest can choose between disabling TSX or
* using VERW to clear CPU buffers.
*/
}
return data;
}
......@@ -9616,6 +9624,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
*/
if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
return false;
if (sregs->cr3 & vcpu->arch.cr3_lm_rsvd_bits)
return false;
} else {
/*
* Not in 64-bit mode: EFER.LMA is clear and the code
......@@ -9993,6 +10003,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
......@@ -10494,7 +10505,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
return 0;
old_npages = slot->npages;
hva = 0;
hva = slot->userspace_addr;
}
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
......
......@@ -425,6 +425,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
__reserved_bits |= X86_CR4_UMIP; \
if (!__cpu_has(__c, X86_FEATURE_VMX)) \
__reserved_bits |= X86_CR4_VMXE; \
if (!__cpu_has(__c, X86_FEATURE_PCID)) \
__reserved_bits |= X86_CR4_PCIDE; \
__reserved_bits; \
})
......
......@@ -382,6 +382,7 @@ bool sev_active(void)
{
return sev_status & MSR_AMD64_SEV_ENABLED;
}
EXPORT_SYMBOL_GPL(sev_active);
/* Needs to be called from non-instrumentable code */
bool noinstr sev_es_active(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment