Commit 36b68d36 authored by Paolo Bonzini's avatar Paolo Bonzini

Merge tag 'kvm-x86-misc-6.5' of https://github.com/kvm-x86/linux into HEAD

KVM x86 changes for 6.5:

* Move handling of PAT out of MTRR code and dedup SVM+VMX code

* Fix output of PIC poll command emulation when there's an interrupt

* Add a maintainer's handbook to document KVM x86 processes, preferred coding
  style, testing expectations, etc.

* Misc cleanups
parents d74669eb 63e2f55c
......@@ -17,3 +17,4 @@ Contents:
maintainer-tip
maintainer-netdev
maintainer-kvm-x86
This diff is collapsed.
......@@ -452,6 +452,8 @@ and can be added to an existing kernel config by running:
Some of these options are x86-specific and can be left out when testing
on other architectures.
.. _maintainer-tip-coding-style:
Coding style notes
------------------
......
......@@ -205,7 +205,7 @@ Shadow pages contain the following information:
role.passthrough:
The page is not backed by a guest page table, but its first entry
points to one. This is set if NPT uses 5-level page tables (host
CR4.LA57=1) and is shadowing L1's 4-level NPT (L1 CR4.LA57=1).
CR4.LA57=1) and is shadowing L1's 4-level NPT (L1 CR4.LA57=0).
gfn:
Either the guest page table containing the translations shadowed by this
page, or the base page frame for linear translations. See role.direct.
......
......@@ -11436,6 +11436,7 @@ M: Sean Christopherson <seanjc@google.com>
M: Paolo Bonzini <pbonzini@redhat.com>
L: kvm@vger.kernel.org
S: Supported
P: Documentation/process/maintainer-kvm-x86.rst
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
F: arch/x86/include/asm/kvm*
F: arch/x86/include/asm/svm.h
......
......@@ -501,20 +501,15 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
struct kvm_cpuid2 *cpuid,
struct kvm_cpuid_entry2 __user *entries)
{
int r;
r = -E2BIG;
if (cpuid->nent < vcpu->arch.cpuid_nent)
goto out;
r = -EFAULT;
return -E2BIG;
if (copy_to_user(entries, vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
return 0;
return -EFAULT;
out:
cpuid->nent = vcpu->arch.cpuid_nent;
return r;
return 0;
}
/* Mask kvm_cpu_caps for @leaf with the raw CPUID capabilities of this CPU. */
......
......@@ -411,7 +411,10 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
pic_clear_isr(s, ret);
if (addr1 >> 7 || ret != 2)
pic_update_irq(s->pics_state);
/* Bit 7 is 1, means there's an interrupt */
ret |= 0x80;
} else {
/* Bit 7 is 0, means there's no interrupt */
ret = 0x07;
pic_update_irq(s->pics_state);
}
......
......@@ -51,11 +51,6 @@
#define mod_64(x, y) ((x) % (y))
#endif
#define PRId64 "d"
#define PRIx64 "llx"
#define PRIu64 "u"
#define PRIo64 "o"
/* 14 is the version for Xeon and Pentium 8.4.8*/
#define APIC_VERSION 0x14UL
#define LAPIC_MMIO_LENGTH (1 << 12)
......
......@@ -25,10 +25,24 @@
#define IA32_MTRR_DEF_TYPE_FE (1ULL << 10)
#define IA32_MTRR_DEF_TYPE_TYPE_MASK (0xff)
static bool is_mtrr_base_msr(unsigned int msr)
{
/* MTRR base MSRs use even numbers, masks use odd numbers. */
return !(msr & 0x1);
}
static struct kvm_mtrr_range *var_mtrr_msr_to_range(struct kvm_vcpu *vcpu,
unsigned int msr)
{
int index = (msr - MTRRphysBase_MSR(0)) / 2;
return &vcpu->arch.mtrr_state.var_ranges[index];
}
static bool msr_mtrr_valid(unsigned msr)
{
switch (msr) {
case 0x200 ... 0x200 + 2 * KVM_NR_VAR_MTRR - 1:
case MTRRphysBase_MSR(0) ... MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1):
case MSR_MTRRfix64K_00000:
case MSR_MTRRfix16K_80000:
case MSR_MTRRfix16K_A0000:
......@@ -41,7 +55,6 @@ static bool msr_mtrr_valid(unsigned msr)
case MSR_MTRRfix4K_F0000:
case MSR_MTRRfix4K_F8000:
case MSR_MTRRdefType:
case MSR_IA32_CR_PAT:
return true;
}
return false;
......@@ -52,7 +65,7 @@ static bool valid_mtrr_type(unsigned t)
return t < 8 && (1 << t) & 0x73; /* 0, 1, 4, 5, 6 */
}
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
static bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
int i;
u64 mask;
......@@ -60,9 +73,7 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
if (!msr_mtrr_valid(msr))
return false;
if (msr == MSR_IA32_CR_PAT) {
return kvm_pat_valid(data);
} else if (msr == MSR_MTRRdefType) {
if (msr == MSR_MTRRdefType) {
if (data & ~0xcff)
return false;
return valid_mtrr_type(data & 0xff);
......@@ -74,7 +85,8 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
}
/* variable MTRRs */
WARN_ON(!(msr >= 0x200 && msr < 0x200 + 2 * KVM_NR_VAR_MTRR));
WARN_ON(!(msr >= MTRRphysBase_MSR(0) &&
msr <= MTRRphysMask_MSR(KVM_NR_VAR_MTRR - 1)));
mask = kvm_vcpu_reserved_gpa_bits_raw(vcpu);
if ((msr & 1) == 0) {
......@@ -88,7 +100,6 @@ bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data)
return (data & mask) == 0;
}
EXPORT_SYMBOL_GPL(kvm_mtrr_valid);
static bool mtrr_is_enabled(struct kvm_mtrr *mtrr_state)
{
......@@ -308,10 +319,8 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
{
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
gfn_t start, end;
int index;
if (msr == MSR_IA32_CR_PAT || !tdp_enabled ||
!kvm_arch_has_noncoherent_dma(vcpu->kvm))
if (!tdp_enabled || !kvm_arch_has_noncoherent_dma(vcpu->kvm))
return;
if (!mtrr_is_enabled(mtrr_state) && msr != MSR_MTRRdefType)
......@@ -326,8 +335,7 @@ static void update_mtrr(struct kvm_vcpu *vcpu, u32 msr)
end = ~0ULL;
} else {
/* variable range MTRRs. */
index = (msr - 0x200) / 2;
var_mtrr_range(&mtrr_state->var_ranges[index], &start, &end);
var_mtrr_range(var_mtrr_msr_to_range(vcpu, msr), &start, &end);
}
kvm_zap_gfn_range(vcpu->kvm, gpa_to_gfn(start), gpa_to_gfn(end));
......@@ -342,21 +350,18 @@ static void set_var_mtrr_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
{
struct kvm_mtrr *mtrr_state = &vcpu->arch.mtrr_state;
struct kvm_mtrr_range *tmp, *cur;
int index, is_mtrr_mask;
index = (msr - 0x200) / 2;
is_mtrr_mask = msr - 0x200 - 2 * index;
cur = &mtrr_state->var_ranges[index];
cur = var_mtrr_msr_to_range(vcpu, msr);
/* remove the entry if it's in the list. */
if (var_mtrr_range_is_valid(cur))
list_del(&mtrr_state->var_ranges[index].node);
list_del(&cur->node);
/*
* Set all illegal GPA bits in the mask, since those bits must
* implicitly be 0. The bits are then cleared when reading them.
*/
if (!is_mtrr_mask)
if (is_mtrr_base_msr(msr))
cur->base = data;
else
cur->mask = data | kvm_vcpu_reserved_gpa_bits_raw(vcpu);
......@@ -382,8 +387,6 @@ int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data)
*(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index] = data;
else if (msr == MSR_MTRRdefType)
vcpu->arch.mtrr_state.deftype = data;
else if (msr == MSR_IA32_CR_PAT)
vcpu->arch.pat = data;
else
set_var_mtrr_msr(vcpu, msr, data);
......@@ -411,21 +414,16 @@ int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
return 1;
index = fixed_msr_to_range_index(msr);
if (index >= 0)
if (index >= 0) {
*pdata = *(u64 *)&vcpu->arch.mtrr_state.fixed_ranges[index];
else if (msr == MSR_MTRRdefType)
} else if (msr == MSR_MTRRdefType) {
*pdata = vcpu->arch.mtrr_state.deftype;
else if (msr == MSR_IA32_CR_PAT)
*pdata = vcpu->arch.pat;
else { /* Variable MTRRs */
int is_mtrr_mask;
index = (msr - 0x200) / 2;
is_mtrr_mask = msr - 0x200 - 2 * index;
if (!is_mtrr_mask)
*pdata = vcpu->arch.mtrr_state.var_ranges[index].base;
} else {
/* Variable MTRRs */
if (is_mtrr_base_msr(msr))
*pdata = var_mtrr_msr_to_range(vcpu, msr)->base;
else
*pdata = vcpu->arch.mtrr_state.var_ranges[index].mask;
*pdata = var_mtrr_msr_to_range(vcpu, msr)->mask;
*pdata &= ~kvm_vcpu_reserved_gpa_bits_raw(vcpu);
}
......
......@@ -752,7 +752,7 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
BUG_ON(offset == MSR_INVALID);
return !!test_bit(bit_write, &tmp);
return test_bit(bit_write, &tmp);
}
static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm,
......@@ -2939,9 +2939,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_CR_PAT:
if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
return 1;
vcpu->arch.pat = data;
ret = kvm_set_msr_common(vcpu, msr);
if (ret)
break;
svm->vmcb01.ptr->save.g_pat = data;
if (is_guest_mode(vcpu))
nested_vmcb02_compute_g_pat(svm);
......
......@@ -2287,19 +2287,16 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
goto find_uret_msr;
case MSR_IA32_CR_PAT:
if (!kvm_pat_valid(data))
return 1;
ret = kvm_set_msr_common(vcpu, msr_info);
if (ret)
break;
if (is_guest_mode(vcpu) &&
get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
get_vmcs12(vcpu)->guest_ia32_pat = data;
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT)
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
}
ret = kvm_set_msr_common(vcpu, msr_info);
break;
case MSR_IA32_MCG_EXT_CTL:
if ((!msr_info->host_initiated &&
......
......@@ -1017,13 +1017,11 @@ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu)
wrmsrl(MSR_IA32_XSS, vcpu->arch.ia32_xss);
}
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (static_cpu_has(X86_FEATURE_PKU) &&
if (cpu_feature_enabled(X86_FEATURE_PKU) &&
vcpu->arch.pkru != vcpu->arch.host_pkru &&
((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE)))
write_pkru(vcpu->arch.pkru);
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
}
EXPORT_SYMBOL_GPL(kvm_load_guest_xsave_state);
......@@ -1032,15 +1030,13 @@ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu)
if (vcpu->arch.guest_state_protected)
return;
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (static_cpu_has(X86_FEATURE_PKU) &&
if (cpu_feature_enabled(X86_FEATURE_PKU) &&
((vcpu->arch.xcr0 & XFEATURE_MASK_PKRU) ||
kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE))) {
vcpu->arch.pkru = rdpkru();
if (vcpu->arch.pkru != vcpu->arch.host_pkru)
write_pkru(vcpu->arch.host_pkru);
}
#endif /* CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS */
if (kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE)) {
......@@ -1427,15 +1423,14 @@ int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
/*
* List of msr numbers which we expose to userspace through KVM_GET_MSRS
* and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
*
* The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features)
* extract the supported MSRs from the related const lists.
* msrs_to_save is selected from the msrs_to_save_all to reflect the
* capabilities of the host cpu. This capabilities test skips MSRs that are
* kvm-specific. Those are put in emulated_msrs_all; filtering of emulated_msrs
* may depend on host virtualization features rather than host cpu features.
* The three MSR lists(msrs_to_save, emulated_msrs, msr_based_features) track
* the set of MSRs that KVM exposes to userspace through KVM_GET_MSRS,
* KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST. msrs_to_save holds MSRs that
* require host support, i.e. should be probed via RDMSR. emulated_msrs holds
* MSRs that KVM emulates without strictly requiring host support.
* msr_based_features holds MSRs that enumerate features, i.e. are effectively
* CPUID leafs. Note, msr_based_features isn't mutually exclusive with
* msrs_to_save and emulated_msrs.
*/
static const u32 msrs_to_save_base[] = {
......@@ -1531,11 +1526,11 @@ static const u32 emulated_msrs_all[] = {
MSR_IA32_UCODE_REV,
/*
* The following list leaves out MSRs whose values are determined
* by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
* We always support the "true" VMX control MSRs, even if the host
* processor does not, so I am putting these registers here rather
* than in msrs_to_save_all.
* KVM always supports the "true" VMX control MSRs, even if the host
* does not. The VMX MSRs as a whole are considered "emulated" as KVM
* doesn't strictly require them to exist in the host (ignoring that
* KVM would refuse to load in the first place if the core set of MSRs
* aren't supported).
*/
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
......@@ -1631,7 +1626,7 @@ static u64 kvm_get_arch_capabilities(void)
* If we're doing cache flushes (either "always" or "cond")
* we will do one whenever the guest does a vmlaunch/vmresume.
* If an outer hypervisor is doing the cache flush for us
* (VMENTER_L1D_FLUSH_NESTED_VM), we can safely pass that
* (ARCH_CAP_SKIP_VMENTRY_L1DFLUSH), we can safely pass that
* capability to the guest too, and if EPT is disabled we're not
* vulnerable. Overall, only VMENTER_L1D_FLUSH_NEVER will
* require a nested hypervisor to do a flush of its own.
......@@ -1809,7 +1804,7 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
unsigned long *bitmap = ranges[i].bitmap;
if ((index >= start) && (index < end) && (flags & type)) {
allowed = !!test_bit(index - start, bitmap);
allowed = test_bit(index - start, bitmap);
break;
}
}
......@@ -3702,8 +3697,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
}
break;
case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
case MSR_IA32_CR_PAT:
if (!kvm_pat_valid(data))
return 1;
vcpu->arch.pat = data;
break;
case MTRRphysBase_MSR(0) ... MSR_MTRRfix4K_F8000:
case MSR_MTRRdefType:
return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE:
return kvm_set_apic_base(vcpu, msr_info);
......@@ -4110,9 +4111,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = kvm_scale_tsc(rdtsc(), ratio) + offset;
break;
}
case MSR_IA32_CR_PAT:
msr_info->data = vcpu->arch.pat;
break;
case MSR_MTRRcap:
case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
case MTRRphysBase_MSR(0) ... MSR_MTRRfix4K_F8000:
case MSR_MTRRdefType:
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
case 0xcd: /* fsb frequency */
msr_info->data = 3;
......
......@@ -309,7 +309,6 @@ void kvm_deliver_exception_payload(struct kvm_vcpu *vcpu,
void kvm_vcpu_mtrr_init(struct kvm_vcpu *vcpu);
u8 kvm_mtrr_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_mtrr_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data);
int kvm_mtrr_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn,
......
......@@ -163,6 +163,25 @@ static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
ent->eax = eax;
}
static void test_get_cpuid2(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
int i, r;
vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
"KVM didn't update nent on success, wanted %u, got %u\n",
vcpu->cpuid->nent, cpuid->nent);
for (i = 0; i < vcpu->cpuid->nent; i++) {
cpuid->nent = i;
r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
}
free(cpuid);
}
int main(void)
{
struct kvm_vcpu *vcpu;
......@@ -183,5 +202,7 @@ int main(void)
set_cpuid_after_run(vcpu);
test_get_cpuid2(vcpu);
kvm_vm_free(vm);
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment