Commit 6157ce59 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM fixes from Paolo Bonzini:
 "x86 has lots of small bugfixes, mostly one liners. It's quite late in
  5.11-rc but none of them are related to this merge window; it's just
  bugs coming in at the wrong time.

  Of note among the others is "KVM: x86: Allow guests to see
  MSR_IA32_TSX_CTRL even if tsx=off" that fixes a live migration failure
  seen on distros that hadn't switched to tsx=off right away.

  ARM:
  - Avoid clobbering extra registers on initialisation"

[ Sean Christopherson notes that commit 943dea8a ("KVM: x86: Update
  emulator context mode if SYSENTER xfers to 64-bit mode") should have
  had authorship credited to Jonny Barker, not to him.  - Linus ]

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: x86: Set so called 'reserved CR3 bits in LM mask' at vCPU reset
  KVM: x86/mmu: Fix TDP MMU zap collapsible SPTEs
  KVM: x86: cleanup CR3 reserved bits checks
  KVM: SVM: Treat SVM as unsupported when running as an SEV guest
  KVM: x86: Update emulator context mode if SYSENTER xfers to 64-bit mode
  KVM: x86: Supplement __cr4_reserved_bits() with X86_FEATURE_PCID check
  KVM/x86: assign hva with the right value to vm_munmap the pages
  KVM: x86: Allow guests to see MSR_IA32_TSX_CTRL even if tsx=off
  Fix unsynchronized access to sev members through svm_register_enc_region
  KVM: Documentation: Fix documentation for nested.
  KVM: x86: fix CPUID entries returned by KVM_GET_CPUID2 ioctl
  KVM: arm64: Don't clobber x4 in __do_hyp_init
parents 97ba0c74 031b91a5
...@@ -37,8 +37,10 @@ call L2. ...@@ -37,8 +37,10 @@ call L2.
Running nested VMX Running nested VMX
------------------ ------------------
The nested VMX feature is disabled by default. It can be enabled by giving The nested VMX feature is enabled by default since Linux kernel v4.20. For
the "nested=1" option to the kvm-intel module. older Linux kernel, it can be enabled by giving the "nested=1" option to the
kvm-intel module.
No modifications are required to user space (qemu). However, qemu's default No modifications are required to user space (qemu). However, qemu's default
emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be emulated CPU type (qemu64) does not list the "VMX" CPU feature, so it must be
......
...@@ -74,7 +74,7 @@ few: ...@@ -74,7 +74,7 @@ few:
Enabling "nested" (x86) Enabling "nested" (x86)
----------------------- -----------------------
From Linux kernel v4.19 onwards, the ``nested`` KVM parameter is enabled From Linux kernel v4.20 onwards, the ``nested`` KVM parameter is enabled
by default for Intel and AMD. (Though your Linux distribution might by default for Intel and AMD. (Though your Linux distribution might
override this default.) override this default.)
......
...@@ -47,6 +47,8 @@ __invalid: ...@@ -47,6 +47,8 @@ __invalid:
b . b .
/* /*
* Only uses x0..x3 so as to not clobber callee-saved SMCCC registers.
*
* x0: SMCCC function ID * x0: SMCCC function ID
* x1: struct kvm_nvhe_init_params PA * x1: struct kvm_nvhe_init_params PA
*/ */
...@@ -70,9 +72,9 @@ __do_hyp_init: ...@@ -70,9 +72,9 @@ __do_hyp_init:
eret eret
1: mov x0, x1 1: mov x0, x1
mov x4, lr mov x3, lr
bl ___kvm_hyp_init bl ___kvm_hyp_init // Clobbers x0..x2
mov lr, x4 mov lr, x3
/* Hello, World! */ /* Hello, World! */
mov x0, #SMCCC_RET_SUCCESS mov x0, #SMCCC_RET_SUCCESS
...@@ -82,8 +84,8 @@ SYM_CODE_END(__kvm_hyp_init) ...@@ -82,8 +84,8 @@ SYM_CODE_END(__kvm_hyp_init)
/* /*
* Initialize the hypervisor in EL2. * Initialize the hypervisor in EL2.
* *
* Only uses x0..x3 so as to not clobber callee-saved SMCCC registers * Only uses x0..x2 so as to not clobber callee-saved SMCCC registers
* and leave x4 for the caller. * and leave x3 for the caller.
* *
* x0: struct kvm_nvhe_init_params PA * x0: struct kvm_nvhe_init_params PA
*/ */
...@@ -112,9 +114,9 @@ alternative_else_nop_endif ...@@ -112,9 +114,9 @@ alternative_else_nop_endif
/* /*
* Set the PS bits in TCR_EL2. * Set the PS bits in TCR_EL2.
*/ */
ldr x1, [x0, #NVHE_INIT_TCR_EL2] ldr x0, [x0, #NVHE_INIT_TCR_EL2]
tcr_compute_pa_size x1, #TCR_EL2_PS_SHIFT, x2, x3 tcr_compute_pa_size x0, #TCR_EL2_PS_SHIFT, x1, x2
msr tcr_el2, x1 msr tcr_el2, x0
isb isb
...@@ -193,7 +195,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu) ...@@ -193,7 +195,7 @@ SYM_CODE_START_LOCAL(__kvm_hyp_init_cpu)
/* Enable MMU, set vectors and stack. */ /* Enable MMU, set vectors and stack. */
mov x0, x28 mov x0, x28
bl ___kvm_hyp_init // Clobbers x0..x3 bl ___kvm_hyp_init // Clobbers x0..x2
/* Leave idmap. */ /* Leave idmap. */
mov x0, x29 mov x0, x29
......
...@@ -321,7 +321,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu, ...@@ -321,7 +321,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
if (cpuid->nent < vcpu->arch.cpuid_nent) if (cpuid->nent < vcpu->arch.cpuid_nent)
goto out; goto out;
r = -EFAULT; r = -EFAULT;
if (copy_to_user(entries, &vcpu->arch.cpuid_entries, if (copy_to_user(entries, vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2))) vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out; goto out;
return 0; return 0;
......
...@@ -2879,6 +2879,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) ...@@ -2879,6 +2879,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data : *reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
(u32)msr_data; (u32)msr_data;
if (efer & EFER_LMA)
ctxt->mode = X86EMUL_MODE_PROT64;
return X86EMUL_CONTINUE; return X86EMUL_CONTINUE;
} }
......
...@@ -1049,8 +1049,8 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot) ...@@ -1049,8 +1049,8 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
} }
/* /*
* Clear non-leaf entries (and free associated page tables) which could * Clear leaf entries which could be replaced by large mappings, for
* be replaced by large mappings, for GFNs within the slot. * GFNs within the slot.
*/ */
static void zap_collapsible_spte_range(struct kvm *kvm, static void zap_collapsible_spte_range(struct kvm *kvm,
struct kvm_mmu_page *root, struct kvm_mmu_page *root,
...@@ -1062,7 +1062,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm, ...@@ -1062,7 +1062,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
tdp_root_for_each_pte(iter, root, start, end) { tdp_root_for_each_pte(iter, root, start, end) {
if (!is_shadow_present_pte(iter.old_spte) || if (!is_shadow_present_pte(iter.old_spte) ||
is_last_spte(iter.old_spte, iter.level)) !is_last_spte(iter.old_spte, iter.level))
continue; continue;
pfn = spte_to_pfn(iter.old_spte); pfn = spte_to_pfn(iter.old_spte);
......
...@@ -231,6 +231,7 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control) ...@@ -231,6 +231,7 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
{ {
struct kvm_vcpu *vcpu = &svm->vcpu;
bool vmcb12_lma; bool vmcb12_lma;
if ((vmcb12->save.efer & EFER_SVME) == 0) if ((vmcb12->save.efer & EFER_SVME) == 0)
...@@ -244,18 +245,10 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12) ...@@ -244,18 +245,10 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG); vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG);
if (!vmcb12_lma) { if (vmcb12_lma) {
if (vmcb12->save.cr4 & X86_CR4_PAE) {
if (vmcb12->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK)
return false;
} else {
if (vmcb12->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK)
return false;
}
} else {
if (!(vmcb12->save.cr4 & X86_CR4_PAE) || if (!(vmcb12->save.cr4 & X86_CR4_PAE) ||
!(vmcb12->save.cr0 & X86_CR0_PE) || !(vmcb12->save.cr0 & X86_CR0_PE) ||
(vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK)) (vmcb12->save.cr3 & vcpu->arch.cr3_lm_rsvd_bits))
return false; return false;
} }
if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4)) if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
......
...@@ -342,6 +342,8 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, ...@@ -342,6 +342,8 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long first, last; unsigned long first, last;
int ret; int ret;
lockdep_assert_held(&kvm->lock);
if (ulen == 0 || uaddr + ulen < uaddr) if (ulen == 0 || uaddr + ulen < uaddr)
return ERR_PTR(-EINVAL); return ERR_PTR(-EINVAL);
...@@ -1119,12 +1121,20 @@ int svm_register_enc_region(struct kvm *kvm, ...@@ -1119,12 +1121,20 @@ int svm_register_enc_region(struct kvm *kvm,
if (!region) if (!region)
return -ENOMEM; return -ENOMEM;
mutex_lock(&kvm->lock);
region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1); region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
if (IS_ERR(region->pages)) { if (IS_ERR(region->pages)) {
ret = PTR_ERR(region->pages); ret = PTR_ERR(region->pages);
mutex_unlock(&kvm->lock);
goto e_free; goto e_free;
} }
region->uaddr = range->addr;
region->size = range->size;
list_add_tail(&region->list, &sev->regions_list);
mutex_unlock(&kvm->lock);
/* /*
* The guest may change the memory encryption attribute from C=0 -> C=1 * The guest may change the memory encryption attribute from C=0 -> C=1
* or vice versa for this memory range. Lets make sure caches are * or vice versa for this memory range. Lets make sure caches are
...@@ -1133,13 +1143,6 @@ int svm_register_enc_region(struct kvm *kvm, ...@@ -1133,13 +1143,6 @@ int svm_register_enc_region(struct kvm *kvm,
*/ */
sev_clflush_pages(region->pages, region->npages); sev_clflush_pages(region->pages, region->npages);
region->uaddr = range->addr;
region->size = range->size;
mutex_lock(&kvm->lock);
list_add_tail(&region->list, &sev->regions_list);
mutex_unlock(&kvm->lock);
return ret; return ret;
e_free: e_free:
......
...@@ -454,6 +454,11 @@ static int has_svm(void) ...@@ -454,6 +454,11 @@ static int has_svm(void)
return 0; return 0;
} }
if (sev_active()) {
pr_info("KVM is unsupported when running as an SEV guest\n");
return 0;
}
return 1; return 1;
} }
......
...@@ -403,9 +403,6 @@ static inline bool gif_set(struct vcpu_svm *svm) ...@@ -403,9 +403,6 @@ static inline bool gif_set(struct vcpu_svm *svm)
} }
/* svm.c */ /* svm.c */
#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U
#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U
#define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U
#define MSR_INVALID 0xffffffffU #define MSR_INVALID 0xffffffffU
extern int sev; extern int sev;
......
...@@ -6860,11 +6860,20 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu) ...@@ -6860,11 +6860,20 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
switch (index) { switch (index) {
case MSR_IA32_TSX_CTRL: case MSR_IA32_TSX_CTRL:
/* /*
* No need to pass TSX_CTRL_CPUID_CLEAR through, so * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
* let's avoid changing CPUID bits under the host * interception. Keep the host value unchanged to avoid
* kernel's feet. * changing CPUID bits under the host kernel's feet.
*
* hle=0, rtm=0, tsx_ctrl=1 can be found with some
* combinations of new kernel and old userspace. If
* those guests run on a tsx=off host, do allow guests
* to use TSX_CTRL, but do not change the value on the
* host so that TSX remains always disabled.
*/ */
vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR; if (boot_cpu_has(X86_FEATURE_RTM))
vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
else
vmx->guest_uret_msrs[j].mask = 0;
break; break;
default: default:
vmx->guest_uret_msrs[j].mask = -1ull; vmx->guest_uret_msrs[j].mask = -1ull;
......
...@@ -1394,16 +1394,24 @@ static u64 kvm_get_arch_capabilities(void) ...@@ -1394,16 +1394,24 @@ static u64 kvm_get_arch_capabilities(void)
if (!boot_cpu_has_bug(X86_BUG_MDS)) if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO; data |= ARCH_CAP_MDS_NO;
/* if (!boot_cpu_has(X86_FEATURE_RTM)) {
* On TAA affected systems: /*
* - nothing to do if TSX is disabled on the host. * If RTM=0 because the kernel has disabled TSX, the host might
* - we emulate TSX_CTRL if present on the host. * have TAA_NO or TSX_CTRL. Clear TAA_NO (the guest sees RTM=0
* This lets the guest use VERW to clear CPU buffers. * and therefore knows that there cannot be TAA) but keep
*/ * TSX_CTRL: some buggy userspaces leave it set on tsx=on hosts,
if (!boot_cpu_has(X86_FEATURE_RTM)) * and we want to allow migrating those guests to tsx=off hosts.
data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR); */
else if (!boot_cpu_has_bug(X86_BUG_TAA)) data &= ~ARCH_CAP_TAA_NO;
} else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
data |= ARCH_CAP_TAA_NO; data |= ARCH_CAP_TAA_NO;
} else {
/*
* Nothing to do here; we emulate TSX_CTRL if present on the
* host so the guest can choose between disabling TSX or
* using VERW to clear CPU buffers.
*/
}
return data; return data;
} }
...@@ -9616,6 +9624,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) ...@@ -9616,6 +9624,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
*/ */
if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA)) if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
return false; return false;
if (sregs->cr3 & vcpu->arch.cr3_lm_rsvd_bits)
return false;
} else { } else {
/* /*
* Not in 64-bit mode: EFER.LMA is clear and the code * Not in 64-bit mode: EFER.LMA is clear and the code
...@@ -9993,6 +10003,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -9993,6 +10003,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
fx_init(vcpu); fx_init(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
...@@ -10494,7 +10505,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, ...@@ -10494,7 +10505,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
return 0; return 0;
old_npages = slot->npages; old_npages = slot->npages;
hva = 0; hva = slot->userspace_addr;
} }
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
......
...@@ -425,6 +425,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type); ...@@ -425,6 +425,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
__reserved_bits |= X86_CR4_UMIP; \ __reserved_bits |= X86_CR4_UMIP; \
if (!__cpu_has(__c, X86_FEATURE_VMX)) \ if (!__cpu_has(__c, X86_FEATURE_VMX)) \
__reserved_bits |= X86_CR4_VMXE; \ __reserved_bits |= X86_CR4_VMXE; \
if (!__cpu_has(__c, X86_FEATURE_PCID)) \
__reserved_bits |= X86_CR4_PCIDE; \
__reserved_bits; \ __reserved_bits; \
}) })
......
...@@ -382,6 +382,7 @@ bool sev_active(void) ...@@ -382,6 +382,7 @@ bool sev_active(void)
{ {
return sev_status & MSR_AMD64_SEV_ENABLED; return sev_status & MSR_AMD64_SEV_ENABLED;
} }
EXPORT_SYMBOL_GPL(sev_active);
/* Needs to be called from non-instrumentable code */ /* Needs to be called from non-instrumentable code */
bool noinstr sev_es_active(void) bool noinstr sev_es_active(void)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment