Commit caa057a2 authored by Wanpeng Li's avatar Wanpeng Li Committed by Paolo Bonzini

KVM: X86: Provide a capability to disable HLT intercepts

If host CPUs are dedicated to a VM, we can avoid VM exits on HLT.
This patch adds the per-VM capability to disable them.

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Jan H. Schönherr <jschoenh@amazon.de>
Signed-off-by: default avatarWanpeng Li <wanpengli@tencent.com>
Signed-off-by: default avatarPaolo Bonzini <pbonzini@redhat.com>
parent 4d5422ce
...@@ -4367,6 +4367,7 @@ Returns: 0 on success, -EINVAL when args[0] contains invalid exits ...@@ -4367,6 +4367,7 @@ Returns: 0 on success, -EINVAL when args[0] contains invalid exits
Valid bits in args[0] are Valid bits in args[0] are
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
Enabling this capability on a VM provides userspace with a way to no Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some longer intercept some instructions for improved latency in some
...@@ -4375,6 +4376,7 @@ physical CPUs. More bits can be added in the future; userspace can ...@@ -4375,6 +4376,7 @@ physical CPUs. More bits can be added in the future; userspace can
just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable just pass the KVM_CHECK_EXTENSION result to KVM_ENABLE_CAP to disable
all such vmexits. all such vmexits.
Do not enable KVM_FEATURE_PV_UNHALT if you disable HLT exits.
8. Other capabilities. 8. Other capabilities.
---------------------- ----------------------
......
...@@ -812,6 +812,7 @@ struct kvm_arch { ...@@ -812,6 +812,7 @@ struct kvm_arch {
gpa_t wall_clock; gpa_t wall_clock;
bool mwait_in_guest; bool mwait_in_guest;
bool hlt_in_guest;
bool ept_identity_pagetable_done; bool ept_identity_pagetable_done;
gpa_t ept_identity_map_addr; gpa_t ept_identity_map_addr;
......
...@@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) ...@@ -135,6 +135,11 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
return -EINVAL; return -EINVAL;
} }
best = kvm_find_cpuid_entry(vcpu, KVM_CPUID_FEATURES, 0);
if (kvm_hlt_in_guest(vcpu->kvm) && best &&
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
/* Update physical-address width */ /* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_mmu_reset_context(vcpu); kvm_mmu_reset_context(vcpu);
......
...@@ -1380,7 +1380,6 @@ static void init_vmcb(struct vcpu_svm *svm) ...@@ -1380,7 +1380,6 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_RDPMC); set_intercept(svm, INTERCEPT_RDPMC);
set_intercept(svm, INTERCEPT_CPUID); set_intercept(svm, INTERCEPT_CPUID);
set_intercept(svm, INTERCEPT_INVD); set_intercept(svm, INTERCEPT_INVD);
set_intercept(svm, INTERCEPT_HLT);
set_intercept(svm, INTERCEPT_INVLPG); set_intercept(svm, INTERCEPT_INVLPG);
set_intercept(svm, INTERCEPT_INVLPGA); set_intercept(svm, INTERCEPT_INVLPGA);
set_intercept(svm, INTERCEPT_IOIO_PROT); set_intercept(svm, INTERCEPT_IOIO_PROT);
...@@ -1403,6 +1402,9 @@ static void init_vmcb(struct vcpu_svm *svm) ...@@ -1403,6 +1402,9 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_MWAIT); set_intercept(svm, INTERCEPT_MWAIT);
} }
if (!kvm_hlt_in_guest(svm->vcpu.kvm))
set_intercept(svm, INTERCEPT_HLT);
control->iopm_base_pa = __sme_set(iopm_base); control->iopm_base_pa = __sme_set(iopm_base);
control->msrpm_base_pa = __sme_set(__pa(svm->msrpm)); control->msrpm_base_pa = __sme_set(__pa(svm->msrpm));
control->int_ctl = V_INTR_MASKING_MASK; control->int_ctl = V_INTR_MASKING_MASK;
......
...@@ -2556,6 +2556,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit ...@@ -2556,6 +2556,19 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit
return 0; return 0;
} }
static void vmx_clear_hlt(struct kvm_vcpu *vcpu)
{
/*
* Ensure that we clear the HLT state in the VMCS. We don't need to
* explicitly skip the instruction because if the HLT state is set,
* then the instruction is already executing and RIP has already been
* advanced.
*/
if (kvm_hlt_in_guest(vcpu->kvm) &&
vmcs_read32(GUEST_ACTIVITY_STATE) == GUEST_ACTIVITY_HLT)
vmcs_write32(GUEST_ACTIVITY_STATE, GUEST_ACTIVITY_ACTIVE);
}
static void vmx_queue_exception(struct kvm_vcpu *vcpu) static void vmx_queue_exception(struct kvm_vcpu *vcpu)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
...@@ -2586,6 +2599,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) ...@@ -2586,6 +2599,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
intr_info |= INTR_TYPE_HARD_EXCEPTION; intr_info |= INTR_TYPE_HARD_EXCEPTION;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info); vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr_info);
vmx_clear_hlt(vcpu);
} }
static bool vmx_rdtscp_supported(void) static bool vmx_rdtscp_supported(void)
...@@ -5545,6 +5560,8 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx) ...@@ -5545,6 +5560,8 @@ static u32 vmx_exec_control(struct vcpu_vmx *vmx)
if (kvm_mwait_in_guest(vmx->vcpu.kvm)) if (kvm_mwait_in_guest(vmx->vcpu.kvm))
exec_control &= ~(CPU_BASED_MWAIT_EXITING | exec_control &= ~(CPU_BASED_MWAIT_EXITING |
CPU_BASED_MONITOR_EXITING); CPU_BASED_MONITOR_EXITING);
if (kvm_hlt_in_guest(vmx->vcpu.kvm))
exec_control &= ~CPU_BASED_HLT_EXITING;
return exec_control; return exec_control;
} }
...@@ -5906,6 +5923,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) ...@@ -5906,6 +5923,8 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
update_exception_bitmap(vcpu); update_exception_bitmap(vcpu);
vpid_sync_context(vmx->vpid); vpid_sync_context(vmx->vpid);
if (init_event)
vmx_clear_hlt(vcpu);
} }
/* /*
...@@ -5976,6 +5995,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu) ...@@ -5976,6 +5995,8 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu)
} else } else
intr |= INTR_TYPE_EXT_INTR; intr |= INTR_TYPE_EXT_INTR;
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr); vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, intr);
vmx_clear_hlt(vcpu);
} }
static void vmx_inject_nmi(struct kvm_vcpu *vcpu) static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
...@@ -6006,6 +6027,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) ...@@ -6006,6 +6027,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR); INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR);
vmx_clear_hlt(vcpu);
} }
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
...@@ -12347,6 +12370,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) ...@@ -12347,6 +12370,7 @@ static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
vmx->nested.smm.vmxon = vmx->nested.vmxon; vmx->nested.smm.vmxon = vmx->nested.vmxon;
vmx->nested.vmxon = false; vmx->nested.vmxon = false;
vmx_clear_hlt(vcpu);
return 0; return 0;
} }
......
...@@ -2878,6 +2878,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) ...@@ -2878,6 +2878,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_CLOCK_TSC_STABLE; r = KVM_CLOCK_TSC_STABLE;
break; break;
case KVM_CAP_X86_DISABLE_EXITS: case KVM_CAP_X86_DISABLE_EXITS:
r |= KVM_X86_DISABLE_EXITS_HTL;
if(kvm_can_mwait_in_guest()) if(kvm_can_mwait_in_guest())
r |= KVM_X86_DISABLE_EXITS_MWAIT; r |= KVM_X86_DISABLE_EXITS_MWAIT;
break; break;
...@@ -4232,6 +4233,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, ...@@ -4232,6 +4233,8 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) && if ((cap->args[0] & KVM_X86_DISABLE_EXITS_MWAIT) &&
kvm_can_mwait_in_guest()) kvm_can_mwait_in_guest())
kvm->arch.mwait_in_guest = true; kvm->arch.mwait_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_HTL)
kvm->arch.hlt_in_guest = true;
r = 0; r = 0;
break; break;
default: default:
......
...@@ -265,11 +265,18 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) ...@@ -265,11 +265,18 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
}) })
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT) #define KVM_X86_DISABLE_EXITS_HTL (1 << 1)
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
KVM_X86_DISABLE_EXITS_HTL)
static inline bool kvm_mwait_in_guest(struct kvm *kvm) static inline bool kvm_mwait_in_guest(struct kvm *kvm)
{ {
return kvm->arch.mwait_in_guest; return kvm->arch.mwait_in_guest;
} }
static inline bool kvm_hlt_in_guest(struct kvm *kvm)
{
return kvm->arch.hlt_in_guest;
}
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment