Commit 281b5278 authored by Jue Wang's avatar Jue Wang Committed by Paolo Bonzini

KVM: x86: Add emulation for MSR_IA32_MCx_CTL2 MSRs.

This patch adds the emulation of IA32_MCi_CTL2 registers to KVM. A
separate mci_ctl2_banks array is used to keep the existing mce_banks
register layout intact.

In Machine Check Architecture, in addition to MCG_CMCI_P, bit 30 of
the per-bank register IA32_MCi_CTL2 controls whether Corrected Machine
Check error reporting is enabled.
Signed-off-by: default avatarJue Wang <juew@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
Message-Id: <20220610171134.772566-7-juew@google.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 087acc4e
...@@ -826,6 +826,7 @@ struct kvm_vcpu_arch { ...@@ -826,6 +826,7 @@ struct kvm_vcpu_arch {
u64 mcg_ctl; u64 mcg_ctl;
u64 mcg_ext_ctl; u64 mcg_ext_ctl;
u64 *mce_banks; u64 *mce_banks;
u64 *mci_ctl2_banks;
/* Cache MMIO info */ /* Cache MMIO info */
u64 mmio_gva; u64 mmio_gva;
......
...@@ -3191,6 +3191,16 @@ static void kvmclock_sync_fn(struct work_struct *work) ...@@ -3191,6 +3191,16 @@ static void kvmclock_sync_fn(struct work_struct *work)
KVMCLOCK_SYNC_PERIOD); KVMCLOCK_SYNC_PERIOD);
} }
/* These helpers are safe iff @msr is known to be an MCx bank MSR. */
static bool is_mci_control_msr(u32 msr)
{
return (msr & 3) == 0;
}
static bool is_mci_status_msr(u32 msr)
{
return (msr & 3) == 1;
}
/* /*
* On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP. * On AMD, HWCR[McStatusWrEn] controls whether setting MCi_STATUS results in #GP.
*/ */
...@@ -3209,6 +3219,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3209,6 +3219,7 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
unsigned bank_num = mcg_cap & 0xff; unsigned bank_num = mcg_cap & 0xff;
u32 msr = msr_info->index; u32 msr = msr_info->index;
u64 data = msr_info->data; u64 data = msr_info->data;
u32 offset, last_msr;
switch (msr) { switch (msr) {
case MSR_IA32_MCG_STATUS: case MSR_IA32_MCG_STATUS:
...@@ -3222,35 +3233,53 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3222,35 +3233,53 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1; return 1;
vcpu->arch.mcg_ctl = data; vcpu->arch.mcg_ctl = data;
break; break;
default: case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
if (msr >= MSR_IA32_MC0_CTL && last_msr = MSR_IA32_MCx_CTL2(bank_num) - 1;
msr < MSR_IA32_MCx_CTL(bank_num)) { if (msr > last_msr)
u32 offset = array_index_nospec( return 1;
msr - MSR_IA32_MC0_CTL,
MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL); if (!(mcg_cap & MCG_CMCI_P) && (data || !msr_info->host_initiated))
return 1;
/* only 0 or all 1s can be written to IA32_MCi_CTL /* An attempt to write a 1 to a reserved bit raises #GP */
* some Linux kernels though clear bit 10 in bank 4 to if (data & ~(MCI_CTL2_CMCI_EN | MCI_CTL2_CMCI_THRESHOLD_MASK))
* workaround a BIOS/GART TBL issue on AMD K8s, ignore return 1;
* this to avoid an uncatched #GP in the guest. offset = array_index_nospec(msr - MSR_IA32_MC0_CTL2,
last_msr + 1 - MSR_IA32_MC0_CTL2);
vcpu->arch.mci_ctl2_banks[offset] = data;
break;
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
last_msr = MSR_IA32_MCx_CTL(bank_num) - 1;
if (msr > last_msr)
return 1;
/*
* Only 0 or all 1s can be written to IA32_MCi_CTL, all other
* values are architecturally undefined. But, some Linux
* kernels clear bit 10 in bank 4 to workaround a BIOS/GART TLB
* issue on AMD K8s, allow bit 10 to be clear when setting all
* other bits in order to avoid an uncaught #GP in the guest.
* *
* UNIXWARE clears bit 0 of MC1_CTL to ignore * UNIXWARE clears bit 0 of MC1_CTL to ignore
* correctable, single-bit ECC data errors. * correctable, single-bit ECC data errors.
*/ */
if ((offset & 0x3) == 0 && if (is_mci_control_msr(msr) &&
data != 0 && (data | (1 << 10) | 1) != ~(u64)0) data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
return -1; return 1;
/* MCi_STATUS */
if (!msr_info->host_initiated &&
(offset & 0x3) == 1 && data != 0) {
if (!can_set_mci_status(vcpu))
return -1;
}
vcpu->arch.mce_banks[offset] = data; /*
break; * All CPUs allow writing 0 to MCi_STATUS MSRs to clear the MSR.
} * AMD-based CPUs allow non-zero values, but if and only if
* HWCR[McStatusWrEn] is set.
*/
if (!msr_info->host_initiated && is_mci_status_msr(msr) &&
data != 0 && !can_set_mci_status(vcpu))
return 1;
offset = array_index_nospec(msr - MSR_IA32_MC0_CTL,
last_msr + 1 - MSR_IA32_MC0_CTL);
vcpu->arch.mce_banks[offset] = data;
break;
default:
return 1; return 1;
} }
return 0; return 0;
...@@ -3534,7 +3563,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3534,7 +3563,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1; return 1;
} }
break; break;
case 0x200 ... 0x2ff: case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
return kvm_mtrr_set_msr(vcpu, msr, data); return kvm_mtrr_set_msr(vcpu, msr, data);
case MSR_IA32_APICBASE: case MSR_IA32_APICBASE:
return kvm_set_apic_base(vcpu, msr_info); return kvm_set_apic_base(vcpu, msr_info);
...@@ -3704,6 +3734,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3704,6 +3734,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS: case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
return set_msr_mce(vcpu, msr_info); return set_msr_mce(vcpu, msr_info);
case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3: case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
...@@ -3819,6 +3850,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) ...@@ -3819,6 +3850,7 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
u64 data; u64 data;
u64 mcg_cap = vcpu->arch.mcg_cap; u64 mcg_cap = vcpu->arch.mcg_cap;
unsigned bank_num = mcg_cap & 0xff; unsigned bank_num = mcg_cap & 0xff;
u32 offset, last_msr;
switch (msr) { switch (msr) {
case MSR_IA32_P5_MC_ADDR: case MSR_IA32_P5_MC_ADDR:
...@@ -3836,16 +3868,27 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) ...@@ -3836,16 +3868,27 @@ static int get_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
case MSR_IA32_MCG_STATUS: case MSR_IA32_MCG_STATUS:
data = vcpu->arch.mcg_status; data = vcpu->arch.mcg_status;
break; break;
default: case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
if (msr >= MSR_IA32_MC0_CTL && last_msr = MSR_IA32_MCx_CTL2(bank_num) - 1;
msr < MSR_IA32_MCx_CTL(bank_num)) { if (msr > last_msr)
u32 offset = array_index_nospec( return 1;
msr - MSR_IA32_MC0_CTL,
MSR_IA32_MCx_CTL(bank_num) - MSR_IA32_MC0_CTL);
data = vcpu->arch.mce_banks[offset]; if (!(mcg_cap & MCG_CMCI_P) && !host)
break; return 1;
} offset = array_index_nospec(msr - MSR_IA32_MC0_CTL2,
last_msr + 1 - MSR_IA32_MC0_CTL2);
data = vcpu->arch.mci_ctl2_banks[offset];
break;
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
last_msr = MSR_IA32_MCx_CTL(bank_num) - 1;
if (msr > last_msr)
return 1;
offset = array_index_nospec(msr - MSR_IA32_MC0_CTL,
last_msr + 1 - MSR_IA32_MC0_CTL);
data = vcpu->arch.mce_banks[offset];
break;
default:
return 1; return 1;
} }
*pdata = data; *pdata = data;
...@@ -3949,7 +3992,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -3949,7 +3992,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break; break;
} }
case MSR_MTRRcap: case MSR_MTRRcap:
case 0x200 ... 0x2ff: case 0x200 ... MSR_IA32_MC0_CTL2 - 1:
case MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) ... 0x2ff:
return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data); return kvm_mtrr_get_msr(vcpu, msr_info->index, &msr_info->data);
case 0xcd: /* fsb frequency */ case 0xcd: /* fsb frequency */
msr_info->data = 3; msr_info->data = 3;
...@@ -4065,6 +4109,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -4065,6 +4109,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_MCG_CTL: case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS: case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1: case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
case MSR_IA32_MC0_CTL2 ... MSR_IA32_MCx_CTL2(KVM_MAX_MCE_BANKS) - 1:
return get_msr_mce(vcpu, msr_info->index, &msr_info->data, return get_msr_mce(vcpu, msr_info->index, &msr_info->data,
msr_info->host_initiated); msr_info->host_initiated);
case MSR_IA32_XSS: case MSR_IA32_XSS:
...@@ -4842,9 +4887,12 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu, ...@@ -4842,9 +4887,12 @@ static int kvm_vcpu_ioctl_x86_setup_mce(struct kvm_vcpu *vcpu,
/* Init IA32_MCG_CTL to all 1s */ /* Init IA32_MCG_CTL to all 1s */
if (mcg_cap & MCG_CTL_P) if (mcg_cap & MCG_CTL_P)
vcpu->arch.mcg_ctl = ~(u64)0; vcpu->arch.mcg_ctl = ~(u64)0;
/* Init IA32_MCi_CTL to all 1s */ /* Init IA32_MCi_CTL to all 1s, IA32_MCi_CTL2 to all 0s */
for (bank = 0; bank < bank_num; bank++) for (bank = 0; bank < bank_num; bank++) {
vcpu->arch.mce_banks[bank*4] = ~(u64)0; vcpu->arch.mce_banks[bank*4] = ~(u64)0;
if (mcg_cap & MCG_CMCI_P)
vcpu->arch.mci_ctl2_banks[bank] = 0;
}
vcpu->arch.apic->nr_lvt_entries = vcpu->arch.apic->nr_lvt_entries =
KVM_APIC_MAX_NR_LVT_ENTRIES - !(mcg_cap & MCG_CMCI_P); KVM_APIC_MAX_NR_LVT_ENTRIES - !(mcg_cap & MCG_CMCI_P);
...@@ -11449,7 +11497,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -11449,7 +11497,9 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
vcpu->arch.mce_banks = kcalloc(KVM_MAX_MCE_BANKS * 4, sizeof(u64), vcpu->arch.mce_banks = kcalloc(KVM_MAX_MCE_BANKS * 4, sizeof(u64),
GFP_KERNEL_ACCOUNT); GFP_KERNEL_ACCOUNT);
if (!vcpu->arch.mce_banks) vcpu->arch.mci_ctl2_banks = kcalloc(KVM_MAX_MCE_BANKS, sizeof(u64),
GFP_KERNEL_ACCOUNT);
if (!vcpu->arch.mce_banks || !vcpu->arch.mci_ctl2_banks)
goto fail_free_pio_data; goto fail_free_pio_data;
vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS; vcpu->arch.mcg_cap = KVM_MAX_MCE_BANKS;
...@@ -11503,6 +11553,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) ...@@ -11503,6 +11553,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); free_cpumask_var(vcpu->arch.wbinvd_dirty_mask);
fail_free_mce_banks: fail_free_mce_banks:
kfree(vcpu->arch.mce_banks); kfree(vcpu->arch.mce_banks);
kfree(vcpu->arch.mci_ctl2_banks);
fail_free_pio_data: fail_free_pio_data:
free_page((unsigned long)vcpu->arch.pio_data); free_page((unsigned long)vcpu->arch.pio_data);
fail_free_lapic: fail_free_lapic:
...@@ -11548,6 +11599,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) ...@@ -11548,6 +11599,7 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
kvm_hv_vcpu_uninit(vcpu); kvm_hv_vcpu_uninit(vcpu);
kvm_pmu_destroy(vcpu); kvm_pmu_destroy(vcpu);
kfree(vcpu->arch.mce_banks); kfree(vcpu->arch.mce_banks);
kfree(vcpu->arch.mci_ctl2_banks);
kvm_free_lapic(vcpu); kvm_free_lapic(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu); idx = srcu_read_lock(&vcpu->kvm->srcu);
kvm_mmu_destroy(vcpu); kvm_mmu_destroy(vcpu);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment