Commit 27461da3 authored by Like Xu's avatar Like Xu Committed by Paolo Bonzini

KVM: x86/pmu: Support full width counting

Intel CPUs have a new alternative MSR range (starting from MSR_IA32_PMC0)
for GP counters that allows writing the full counter width. Enable this
range from a new capability bit (IA32_PERF_CAPABILITIES.FW_WRITE[bit 13]).

The guest would query CPUID to get the counter width, and sign extends
the counter values as needed. The traditional MSRs always limit to 32bit,
even though the counter internally is larger (48 or 57 bits).

When the new capability is set, use the alternative range which do not
have these restrictions. This lowers the overhead of perf stat slightly
because it has to do less interrupts to accumulate the counter value.
Signed-off-by: default avatarLike Xu <like.xu@linux.intel.com>
Message-Id: <20200529074347.124619-3-like.xu@linux.intel.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent cbd71758
...@@ -601,6 +601,7 @@ struct kvm_vcpu_arch { ...@@ -601,6 +601,7 @@ struct kvm_vcpu_arch {
u64 ia32_xss; u64 ia32_xss;
u64 microcode_version; u64 microcode_version;
u64 arch_capabilities; u64 arch_capabilities;
u64 perf_capabilities;
/* /*
* Paging state of the vcpu * Paging state of the vcpu
......
...@@ -296,7 +296,7 @@ void kvm_set_cpu_caps(void) ...@@ -296,7 +296,7 @@ void kvm_set_cpu_caps(void)
F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
0 /* DS-CPL, VMX, SMX, EST */ | 0 /* DS-CPL, VMX, SMX, EST */ |
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) |
F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |
......
...@@ -18,6 +18,8 @@ extern int __read_mostly pt_mode; ...@@ -18,6 +18,8 @@ extern int __read_mostly pt_mode;
#define PT_MODE_SYSTEM 0 #define PT_MODE_SYSTEM 0
#define PT_MODE_HOST_GUEST 1 #define PT_MODE_HOST_GUEST 1
#define PMU_CAP_FW_WRITES (1ULL << 13)
struct nested_vmx_msrs { struct nested_vmx_msrs {
/* /*
* We only store the "true" versions of the VMX capability MSRs. We * We only store the "true" versions of the VMX capability MSRs. We
...@@ -367,4 +369,13 @@ static inline bool vmx_pt_mode_is_host_guest(void) ...@@ -367,4 +369,13 @@ static inline bool vmx_pt_mode_is_host_guest(void)
return pt_mode == PT_MODE_HOST_GUEST; return pt_mode == PT_MODE_HOST_GUEST;
} }
static inline u64 vmx_get_perf_capabilities(void)
{
/*
* Since counters are virtualized, KVM would support full
* width counting unconditionally, even if the host lacks it.
*/
return PMU_CAP_FW_WRITES;
}
#endif /* __KVM_X86_VMX_CAPS_H */ #endif /* __KVM_X86_VMX_CAPS_H */
...@@ -18,6 +18,8 @@ ...@@ -18,6 +18,8 @@
#include "nested.h" #include "nested.h"
#include "pmu.h" #include "pmu.h"
#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
static struct kvm_event_hw_type_mapping intel_arch_events[] = { static struct kvm_event_hw_type_mapping intel_arch_events[] = {
/* Index must match CPUID 0x0A.EBX bit vector */ /* Index must match CPUID 0x0A.EBX bit vector */
[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
...@@ -150,6 +152,22 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, ...@@ -150,6 +152,22 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
return &counters[array_index_nospec(idx, num_counters)]; return &counters[array_index_nospec(idx, num_counters)];
} }
static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
{
if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
return false;
return vcpu->arch.perf_capabilities & PMU_CAP_FW_WRITES;
}
static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
{
if (!fw_writes_is_enabled(pmu_to_vcpu(pmu)))
return NULL;
return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
}
static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
{ {
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
...@@ -162,10 +180,13 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) ...@@ -162,10 +180,13 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
case MSR_CORE_PERF_GLOBAL_OVF_CTRL: case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
ret = pmu->version > 1; ret = pmu->version > 1;
break; break;
case MSR_IA32_PERF_CAPABILITIES:
ret = guest_cpuid_has(vcpu, X86_FEATURE_PDCM);
break;
default: default:
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
get_fixed_pmc(pmu, msr); get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr);
break; break;
} }
...@@ -203,8 +224,15 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -203,8 +224,15 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_CORE_PERF_GLOBAL_OVF_CTRL: case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
msr_info->data = pmu->global_ovf_ctrl; msr_info->data = pmu->global_ovf_ctrl;
return 0; return 0;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated &&
!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
return 1;
msr_info->data = vcpu->arch.perf_capabilities;
return 0;
default: default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
u64 val = pmc_read_counter(pmc); u64 val = pmc_read_counter(pmc);
msr_info->data = msr_info->data =
val & pmu->counter_bitmask[KVM_PMC_GP]; val & pmu->counter_bitmask[KVM_PMC_GP];
...@@ -261,9 +289,22 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) ...@@ -261,9 +289,22 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 0; return 0;
} }
break; break;
case MSR_IA32_PERF_CAPABILITIES:
if (!msr_info->host_initiated)
return 1;
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
(data & ~vmx_get_perf_capabilities()) : data)
return 1;
vcpu->arch.perf_capabilities = data;
return 0;
default: default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
if (!msr_info->host_initiated) (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
(data & ~pmu->counter_bitmask[KVM_PMC_GP]))
return 1;
if (!msr_info->host_initiated &&
!(msr & MSR_PMC_FULL_WIDTH_BIT))
data = (s64)(s32)data; data = (s64)(s32)data;
pmc->counter += data - pmc_read_counter(pmc); pmc->counter += data - pmc_read_counter(pmc);
if (pmc->perf_event) if (pmc->perf_event)
...@@ -303,6 +344,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) ...@@ -303,6 +344,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
pmu->version = 0; pmu->version = 0;
pmu->reserved_bits = 0xffffffff00200000ull; pmu->reserved_bits = 0xffffffff00200000ull;
vcpu->arch.perf_capabilities = 0;
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
if (!entry) if (!entry)
...@@ -315,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) ...@@ -315,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
return; return;
perf_get_x86_pmu_capability(&x86_pmu); perf_get_x86_pmu_capability(&x86_pmu);
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
x86_pmu.num_counters_gp); x86_pmu.num_counters_gp);
......
...@@ -1788,6 +1788,9 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) ...@@ -1788,6 +1788,9 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
if (!nested) if (!nested)
return 1; return 1;
return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
case MSR_IA32_PERF_CAPABILITIES:
msr->data = vmx_get_perf_capabilities();
return 0;
default: default:
return 1; return 1;
} }
......
...@@ -1253,6 +1253,7 @@ static const u32 emulated_msrs_all[] = { ...@@ -1253,6 +1253,7 @@ static const u32 emulated_msrs_all[] = {
MSR_IA32_TSC_ADJUST, MSR_IA32_TSC_ADJUST,
MSR_IA32_TSCDEADLINE, MSR_IA32_TSCDEADLINE,
MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE,
MSR_IA32_MCG_STATUS, MSR_IA32_MCG_STATUS,
MSR_IA32_MCG_CTL, MSR_IA32_MCG_CTL,
...@@ -1319,6 +1320,7 @@ static const u32 msr_based_features_all[] = { ...@@ -1319,6 +1320,7 @@ static const u32 msr_based_features_all[] = {
MSR_F10H_DECFG, MSR_F10H_DECFG,
MSR_IA32_UCODE_REV, MSR_IA32_UCODE_REV,
MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_ARCH_CAPABILITIES,
MSR_IA32_PERF_CAPABILITIES,
}; };
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment