KVM: SVM: use NPT page attributes

Right now, NPT page attributes are not used, and the final page attribute depends solely on gPAT (which however is not synced correctly), the guest MTRRs and the guest page attributes. However, we can do better by mimicking what is done for VMX. In the absence of PCI passthrough, the guest PAT can be ignored and the page attributes can be just WB. If passthrough is being used, instead, keep respecting the guest PAT, and emulate the guest MTRRs through the PAT field of the nested page tables. The only snag is that WP memory cannot be emulated correctly, because Linux's default PAT setting only includes the other types. Tested-by: Joerg Roedel <jroedel@suse.de> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

KVM: SVM: use NPT page attributes
Right now, NPT page attributes are not used, and the final page attribute depends solely on gPAT (which however is not synced correctly), the guest MTRRs and the guest page attributes. However, we can do better by mimicking what is done for VMX. In the absence of PCI passthrough, the guest PAT can be ignored and the page attributes can be just WB. If passthrough is being used, instead, keep respecting the guest PAT, and emulate the guest MTRRs through the PAT field of the nested page tables. The only snag is that WP memory cannot be emulated correctly, because Linux's default PAT setting only includes the other types. Tested-by: Joerg Roedel <jroedel@suse.de> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
3c2e7f7d · Paolo Bonzini · 5544eb9b · 3c2e7f7d
Commit 3c2e7f7d authored Jul 07, 2015 by Paolo Bonzini
Show whitespace changes
Inline Side-by-side

Showing with 96 additions and 5 deletions

arch/x86/kvm/svm.c arch/x86/kvm/svm.c +96 -5

No files found.
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -865,6 +865,64 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
 	set_msr_interception(msrpm, MSR_IA32_LASTINTTOIP, 0, 0);
 }

+#define MTRR_TYPE_UC_MINUS	7
+#define MTRR2PROTVAL_INVALID 0xff
+
+static u8 mtrr2protval[8];
+
+static u8 fallback_mtrr_type(int mtrr)
+{
+	/*
+	 * WT and WP aren't always available in the host PAT.  Treat
+	 * them as UC and UC- respectively.  Everything else should be
+	 * there.
+	 */
+	switch (mtrr)
+	{
+	case MTRR_TYPE_WRTHROUGH:
+		return MTRR_TYPE_UNCACHABLE;
+	case MTRR_TYPE_WRPROT:
+		return MTRR_TYPE_UC_MINUS;
+	default:
+		BUG();
+	}
+}
+
+static void build_mtrr2protval(void)
+{
+	int i;
+	u64 pat;
+
+	for (i = 0; i < 8; i++)
+		mtrr2protval[i] = MTRR2PROTVAL_INVALID;
+
+	/* Ignore the invalid MTRR types.  */
+	mtrr2protval[2] = 0;
+	mtrr2protval[3] = 0;
+
+	/*
+	 * Use host PAT value to figure out the mapping from guest MTRR
+	 * values to nested page table PAT/PCD/PWT values.  We do not
+	 * want to change the host PAT value every time we enter the
+	 * guest.
+	 */
+	rdmsrl(MSR_IA32_CR_PAT, pat);
+	for (i = 0; i < 8; i++) {
+		u8 mtrr = pat >> (8 * i);
+
+		if (mtrr2protval[mtrr] == MTRR2PROTVAL_INVALID)
+			mtrr2protval[mtrr] = __cm_idx2pte(i);
+	}
+
+	for (i = 0; i < 8; i++) {
+		if (mtrr2protval[i] == MTRR2PROTVAL_INVALID) {
+			u8 fallback = fallback_mtrr_type(i);
+			mtrr2protval[i] = mtrr2protval[fallback];
+			BUG_ON(mtrr2protval[i] == MTRR2PROTVAL_INVALID);
+		}
+	}
+}
+
 static __init int svm_hardware_setup(void)
 {
 	int cpu;
@@ -931,6 +989,7 @@ static __init int svm_hardware_setup(void)
 	} else
 		kvm_disable_tdp();

+	build_mtrr2protval();
 	return 0;

 err:
@@ -1085,6 +1144,42 @@ static u64 svm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 	return target_tsc - tsc;
 }

+static void svm_set_guest_pat(struct vcpu_svm *svm, u64 *g_pat)
+{
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+
+	/* Unlike Intel, AMD takes the guest's CR0.CD into account.
+	 *
+	 * AMD does not have IPAT.  To emulate it for the case of guests
+	 * with no assigned devices, just set everything to WB.  If guests
+	 * have assigned devices, however, we cannot force WB for RAM
+	 * pages only, so use the guest PAT directly.
+	 */
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		*g_pat = 0x0606060606060606;
+	else
+		*g_pat = vcpu->arch.pat;
+}
+
+static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
+{
+	u8 mtrr;
+
+	/*
+	 * 1. MMIO: always map as UC
+	 * 2. No passthrough: always map as WB, and force guest PAT to WB as well
+	 * 3. Passthrough: can't guarantee the result, try to trust guest.
+	 */
+	if (is_mmio)
+		return _PAGE_NOCACHE;
+
+	if (!kvm_arch_has_assigned_device(vcpu->kvm))
+		return 0;
+
+	mtrr = kvm_mtrr_get_guest_memory_type(vcpu, gfn);
+	return mtrr2protval[mtrr];
+}
+
 static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 {
 	struct vmcb_control_area *control = &svm->vmcb->control;
@@ -1180,6 +1275,7 @@ static void init_vmcb(struct vcpu_svm *svm, bool init_event)
 		clr_cr_intercept(svm, INTERCEPT_CR3_READ);
 		clr_cr_intercept(svm, INTERCEPT_CR3_WRITE);
 		save->g_pat = svm->vcpu.arch.pat;
+		svm_set_guest_pat(svm, &save->g_pat);
 		save->cr3 = 0;
 		save->cr4 = 0;
 	}
@@ -4088,11 +4184,6 @@ static bool svm_has_high_real_mode_segbase(void)
 	return true;
 }

-static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
-{
-	return 0;
-}
-
 static void svm_cpuid_update(struct kvm_vcpu *vcpu)
 {
 }