Commit 75253db4 authored by Brijesh Singh's avatar Brijesh Singh Committed by Borislav Petkov (AMD)

KVM: SEV: Make AVIC backing, VMSA and VMCB memory allocation SNP safe

Implement a workaround for an SNP erratum where the CPU will incorrectly
signal an RMP violation #PF if a hugepage (2MB or 1GB) collides with the
RMP entry of a VMCB, VMSA or AVIC backing page.

When SEV-SNP is globally enabled, the CPU marks the VMCB, VMSA, and AVIC
backing pages as "in-use" via a reserved bit in the corresponding RMP
entry after a successful VMRUN. This is done for _all_ VMs, not just
SNP-Active VMs.

If the hypervisor accesses an in-use page through a writable
translation, the CPU will throw an RMP violation #PF. On early SNP
hardware, if an in-use page is 2MB-aligned and software accesses any
part of the associated 2MB region with a hugepage, the CPU will
incorrectly treat the entire 2MB region as in-use and signal a an RMP
violation #PF.

To avoid this, the recommendation is to not use a 2MB-aligned page for
the VMCB, VMSA or AVIC pages. Add a generic allocator that will ensure
that the page returned is not 2MB-aligned and is safe to be used when
SEV-SNP is enabled. Also implement similar handling for the VMCB/VMSA
pages of nested guests.

  [ mdr: Squash in nested guest handling from Ashish, commit msg fixups. ]

Reported-by: Alper Gun <alpergun@google.com> # for nested VMSA case
Signed-off-by: default avatarBrijesh Singh <brijesh.singh@amd.com>
Co-developed-by: default avatarMarc Orr <marcorr@google.com>
Signed-off-by: default avatarMarc Orr <marcorr@google.com>
Co-developed-by: default avatarAshish Kalra <ashish.kalra@amd.com>
Signed-off-by: default avatarAshish Kalra <ashish.kalra@amd.com>
Signed-off-by: default avatarMichael Roth <michael.roth@amd.com>
Signed-off-by: default avatarBorislav Petkov (AMD) <bp@alien8.de>
Acked-by: default avatarVlastimil Babka <vbabka@suse.cz>
Acked-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20240126041126.1927228-22-michael.roth@amd.com
parent 8ef97958
...@@ -138,6 +138,7 @@ KVM_X86_OP(complete_emulated_msr) ...@@ -138,6 +138,7 @@ KVM_X86_OP(complete_emulated_msr)
KVM_X86_OP(vcpu_deliver_sipi_vector) KVM_X86_OP(vcpu_deliver_sipi_vector)
KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons); KVM_X86_OP_OPTIONAL_RET0(vcpu_get_apicv_inhibit_reasons);
KVM_X86_OP_OPTIONAL(get_untagged_addr) KVM_X86_OP_OPTIONAL(get_untagged_addr)
KVM_X86_OP_OPTIONAL(alloc_apic_backing_page)
#undef KVM_X86_OP #undef KVM_X86_OP
#undef KVM_X86_OP_OPTIONAL #undef KVM_X86_OP_OPTIONAL
......
...@@ -1794,6 +1794,7 @@ struct kvm_x86_ops { ...@@ -1794,6 +1794,7 @@ struct kvm_x86_ops {
unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu); unsigned long (*vcpu_get_apicv_inhibit_reasons)(struct kvm_vcpu *vcpu);
gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags); gva_t (*get_untagged_addr)(struct kvm_vcpu *vcpu, gva_t gva, unsigned int flags);
void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu);
}; };
struct kvm_x86_nested_ops { struct kvm_x86_nested_ops {
......
...@@ -2815,7 +2815,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) ...@@ -2815,7 +2815,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
vcpu->arch.apic = apic; vcpu->arch.apic = apic;
apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); if (kvm_x86_ops.alloc_apic_backing_page)
apic->regs = static_call(kvm_x86_alloc_apic_backing_page)(vcpu);
else
apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
if (!apic->regs) { if (!apic->regs) {
printk(KERN_ERR "malloc apic regs error for vcpu %x\n", printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
vcpu->vcpu_id); vcpu->vcpu_id);
......
...@@ -1181,7 +1181,7 @@ int svm_allocate_nested(struct vcpu_svm *svm) ...@@ -1181,7 +1181,7 @@ int svm_allocate_nested(struct vcpu_svm *svm)
if (svm->nested.initialized) if (svm->nested.initialized)
return 0; return 0;
vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); vmcb02_page = snp_safe_alloc_page(&svm->vcpu);
if (!vmcb02_page) if (!vmcb02_page)
return -ENOMEM; return -ENOMEM;
svm->nested.vmcb02.ptr = page_address(vmcb02_page); svm->nested.vmcb02.ptr = page_address(vmcb02_page);
......
...@@ -3163,3 +3163,35 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) ...@@ -3163,3 +3163,35 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1); ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
} }
struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu)
{
unsigned long pfn;
struct page *p;
if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP))
return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
/*
* Allocate an SNP-safe page to workaround the SNP erratum where
* the CPU will incorrectly signal an RMP violation #PF if a
* hugepage (2MB or 1GB) collides with the RMP entry of a
* 2MB-aligned VMCB, VMSA, or AVIC backing page.
*
* Allocate one extra page, choose a page which is not
* 2MB-aligned, and free the other.
*/
p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1);
if (!p)
return NULL;
split_page(p, 1);
pfn = page_to_pfn(p);
if (IS_ALIGNED(pfn, PTRS_PER_PMD))
__free_page(p++);
else
__free_page(p + 1);
return p;
}
...@@ -703,7 +703,7 @@ static int svm_cpu_init(int cpu) ...@@ -703,7 +703,7 @@ static int svm_cpu_init(int cpu)
int ret = -ENOMEM; int ret = -ENOMEM;
memset(sd, 0, sizeof(struct svm_cpu_data)); memset(sd, 0, sizeof(struct svm_cpu_data));
sd->save_area = alloc_page(GFP_KERNEL | __GFP_ZERO); sd->save_area = snp_safe_alloc_page(NULL);
if (!sd->save_area) if (!sd->save_area)
return ret; return ret;
...@@ -1421,7 +1421,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -1421,7 +1421,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
svm = to_svm(vcpu); svm = to_svm(vcpu);
err = -ENOMEM; err = -ENOMEM;
vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); vmcb01_page = snp_safe_alloc_page(vcpu);
if (!vmcb01_page) if (!vmcb01_page)
goto out; goto out;
...@@ -1430,7 +1430,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -1430,7 +1430,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
* SEV-ES guests require a separate VMSA page used to contain * SEV-ES guests require a separate VMSA page used to contain
* the encrypted register state of the guest. * the encrypted register state of the guest.
*/ */
vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); vmsa_page = snp_safe_alloc_page(vcpu);
if (!vmsa_page) if (!vmsa_page)
goto error_free_vmcb_page; goto error_free_vmcb_page;
...@@ -4900,6 +4900,16 @@ static int svm_vm_init(struct kvm *kvm) ...@@ -4900,6 +4900,16 @@ static int svm_vm_init(struct kvm *kvm)
return 0; return 0;
} }
static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu)
{
struct page *page = snp_safe_alloc_page(vcpu);
if (!page)
return NULL;
return page_address(page);
}
static struct kvm_x86_ops svm_x86_ops __initdata = { static struct kvm_x86_ops svm_x86_ops __initdata = {
.name = KBUILD_MODNAME, .name = KBUILD_MODNAME,
...@@ -5031,6 +5041,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { ...@@ -5031,6 +5041,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
.vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons, .vcpu_get_apicv_inhibit_reasons = avic_vcpu_get_apicv_inhibit_reasons,
.alloc_apic_backing_page = svm_alloc_apic_backing_page,
}; };
/* /*
......
...@@ -694,6 +694,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm); ...@@ -694,6 +694,7 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa); void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
void sev_es_unmap_ghcb(struct vcpu_svm *svm); void sev_es_unmap_ghcb(struct vcpu_svm *svm);
struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu);
/* vmenter.S */ /* vmenter.S */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment