processor.h 41.1 KB
Newer Older
1
/* SPDX-License-Identifier: GPL-2.0-only */
2
/*
3
 * tools/testing/selftests/kvm/include/x86_64/processor.h
4 5 6 7
 *
 * Copyright (C) 2018, Google LLC.
 */

8 9
#ifndef SELFTEST_KVM_PROCESSOR_H
#define SELFTEST_KVM_PROCESSOR_H
10 11 12

#include <assert.h>
#include <stdint.h>
13
#include <syscall.h>
14

15
#include <asm/msr-index.h>
16
#include <asm/prctl.h>
17

18
#include <linux/kvm_para.h>
19 20
#include <linux/stringify.h>

21 22
#include "../kvm_util.h"

23 24 25
extern bool host_cpu_is_intel;
extern bool host_cpu_is_amd;

26 27 28
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"

29 30
#define NMI_VECTOR		0x02

31 32 33 34 35 36 37 38 39 40 41 42 43 44
#define X86_EFLAGS_FIXED	 (1u << 1)

#define X86_CR4_VME		(1ul << 0)
#define X86_CR4_PVI		(1ul << 1)
#define X86_CR4_TSD		(1ul << 2)
#define X86_CR4_DE		(1ul << 3)
#define X86_CR4_PSE		(1ul << 4)
#define X86_CR4_PAE		(1ul << 5)
#define X86_CR4_MCE		(1ul << 6)
#define X86_CR4_PGE		(1ul << 7)
#define X86_CR4_PCE		(1ul << 8)
#define X86_CR4_OSFXSR		(1ul << 9)
#define X86_CR4_OSXMMEXCPT	(1ul << 10)
#define X86_CR4_UMIP		(1ul << 11)
45
#define X86_CR4_LA57		(1ul << 12)
46 47 48 49 50 51 52 53 54
#define X86_CR4_VMXE		(1ul << 13)
#define X86_CR4_SMXE		(1ul << 14)
#define X86_CR4_FSGSBASE	(1ul << 16)
#define X86_CR4_PCIDE		(1ul << 17)
#define X86_CR4_OSXSAVE		(1ul << 18)
#define X86_CR4_SMEP		(1ul << 20)
#define X86_CR4_SMAP		(1ul << 21)
#define X86_CR4_PKE		(1ul << 22)

55 56 57 58 59 60 61 62 63 64 65 66
struct xstate_header {
	u64				xstate_bv;
	u64				xcomp_bv;
	u64				reserved[6];
} __attribute__((packed));

struct xstate {
	u8				i387[512];
	struct xstate_header		header;
	u8				extended_state_area[0];
} __attribute__ ((packed, aligned (64)));

67 68 69 70 71 72 73 74
#define XFEATURE_MASK_FP		BIT_ULL(0)
#define XFEATURE_MASK_SSE		BIT_ULL(1)
#define XFEATURE_MASK_YMM		BIT_ULL(2)
#define XFEATURE_MASK_BNDREGS		BIT_ULL(3)
#define XFEATURE_MASK_BNDCSR		BIT_ULL(4)
#define XFEATURE_MASK_OPMASK		BIT_ULL(5)
#define XFEATURE_MASK_ZMM_Hi256		BIT_ULL(6)
#define XFEATURE_MASK_Hi16_ZMM		BIT_ULL(7)
75 76 77 78 79 80
#define XFEATURE_MASK_PT		BIT_ULL(8)
#define XFEATURE_MASK_PKRU		BIT_ULL(9)
#define XFEATURE_MASK_PASID		BIT_ULL(10)
#define XFEATURE_MASK_CET_USER		BIT_ULL(11)
#define XFEATURE_MASK_CET_KERNEL	BIT_ULL(12)
#define XFEATURE_MASK_LBR		BIT_ULL(15)
81 82 83 84 85 86 87 88 89
#define XFEATURE_MASK_XTILE_CFG		BIT_ULL(17)
#define XFEATURE_MASK_XTILE_DATA	BIT_ULL(18)

#define XFEATURE_MASK_AVX512		(XFEATURE_MASK_OPMASK | \
					 XFEATURE_MASK_ZMM_Hi256 | \
					 XFEATURE_MASK_Hi16_ZMM)
#define XFEATURE_MASK_XTILE		(XFEATURE_MASK_XTILE_DATA | \
					 XFEATURE_MASK_XTILE_CFG)

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107
/* Note, these are ordered alphabetically to match kvm_cpuid_entry2.  Eww. */
enum cpuid_output_regs {
	KVM_CPUID_EAX,
	KVM_CPUID_EBX,
	KVM_CPUID_ECX,
	KVM_CPUID_EDX
};

/*
 * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
 * passed by value with no overhead.
 */
struct kvm_x86_cpu_feature {
	u32	function;
	u16	index;
	u8	reg;
	u8	bit;
};
108 109 110 111 112 113 114 115 116
#define	KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit)				\
({										\
	struct kvm_x86_cpu_feature feature = {					\
		.function = fn,							\
		.index = idx,							\
		.reg = KVM_CPUID_##gpr,						\
		.bit = __bit,							\
	};									\
										\
117 118 119 120 121
	kvm_static_assert((fn & 0xc0000000) == 0 ||				\
			  (fn & 0xc0000000) == 0x40000000 ||			\
			  (fn & 0xc0000000) == 0x80000000 ||			\
			  (fn & 0xc0000000) == 0xc0000000);			\
	kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));	\
122
	feature;								\
123 124 125 126 127 128 129 130
})

/*
 * Basic Leafs, a.k.a. Intel defined
 */
#define	X86_FEATURE_MWAIT		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
#define	X86_FEATURE_VMX			KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
#define	X86_FEATURE_SMX			KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
131
#define	X86_FEATURE_PDCM		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
132
#define	X86_FEATURE_PCID		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
133
#define X86_FEATURE_X2APIC		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
134 135 136 137 138
#define	X86_FEATURE_MOVBE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
#define	X86_FEATURE_TSC_DEADLINE_TIMER	KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
#define	X86_FEATURE_XSAVE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
#define	X86_FEATURE_OSXSAVE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
#define	X86_FEATURE_RDRAND		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
139
#define	X86_FEATURE_HYPERVISOR		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
140
#define X86_FEATURE_PAE			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
141 142 143 144 145 146 147
#define	X86_FEATURE_MCE			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
#define	X86_FEATURE_APIC		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
#define	X86_FEATURE_CLFLUSH		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
#define	X86_FEATURE_XMM			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
#define	X86_FEATURE_XMM2		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
#define	X86_FEATURE_FSGSBASE		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
#define	X86_FEATURE_TSC_ADJUST		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
148
#define	X86_FEATURE_SGX			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
149 150 151 152
#define	X86_FEATURE_HLE			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
#define	X86_FEATURE_SMEP	        KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
#define	X86_FEATURE_INVPCID		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
#define	X86_FEATURE_RTM			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
153
#define	X86_FEATURE_MPX			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
154 155 156 157 158 159
#define	X86_FEATURE_SMAP		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
#define	X86_FEATURE_PCOMMIT		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
#define	X86_FEATURE_CLFLUSHOPT		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
#define	X86_FEATURE_CLWB		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
#define	X86_FEATURE_UMIP		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
#define	X86_FEATURE_PKU			KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
160
#define	X86_FEATURE_OSPKE		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
161 162
#define	X86_FEATURE_LA57		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
#define	X86_FEATURE_RDPID		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
163
#define	X86_FEATURE_SGX_LC		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
164 165
#define	X86_FEATURE_SHSTK		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
#define	X86_FEATURE_IBT			KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
166
#define	X86_FEATURE_AMX_TILE		KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
167 168 169
#define	X86_FEATURE_SPEC_CTRL		KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
#define	X86_FEATURE_ARCH_CAPABILITIES	KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
#define	X86_FEATURE_PKS			KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
170 171
#define	X86_FEATURE_XTILECFG		KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
#define	X86_FEATURE_XTILEDATA		KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
172
#define	X86_FEATURE_XSAVES		KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
173
#define	X86_FEATURE_XFD			KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
174
#define X86_FEATURE_XTILEDATA_XFD	KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
175 176 177 178 179 180 181 182 183

/*
 * Extended Leafs, a.k.a. AMD defined
 */
#define	X86_FEATURE_SVM			KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
#define	X86_FEATURE_NX			KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
#define	X86_FEATURE_GBPAGES		KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
#define	X86_FEATURE_RDTSCP		KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
#define	X86_FEATURE_LM			KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
184
#define	X86_FEATURE_INVTSC		KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
185 186 187 188 189 190 191 192 193
#define	X86_FEATURE_RDPRU		KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
#define	X86_FEATURE_AMD_IBPB		KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
#define	X86_FEATURE_NPT			KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
#define	X86_FEATURE_LBRV		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
#define	X86_FEATURE_NRIPS		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
#define X86_FEATURE_TSCRATEMSR          KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
#define X86_FEATURE_PAUSEFILTER         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
#define X86_FEATURE_PFTHRESHOLD         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
#define	X86_FEATURE_VGIF		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
194 195
#define X86_FEATURE_SEV			KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
#define X86_FEATURE_SEV_ES		KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
196

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
/*
 * KVM defined paravirt features.
 */
#define X86_FEATURE_KVM_CLOCKSOURCE	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
#define X86_FEATURE_KVM_NOP_IO_DELAY	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
#define X86_FEATURE_KVM_MMU_OP		KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
#define X86_FEATURE_KVM_CLOCKSOURCE2	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
#define X86_FEATURE_KVM_ASYNC_PF	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
#define X86_FEATURE_KVM_STEAL_TIME	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
#define X86_FEATURE_KVM_PV_EOI		KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
#define X86_FEATURE_KVM_PV_UNHALT	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
/* Bit 8 apparently isn't used?!?! */
#define X86_FEATURE_KVM_PV_TLB_FLUSH	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
#define X86_FEATURE_KVM_PV_SEND_IPI	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
#define X86_FEATURE_KVM_POLL_CONTROL	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
#define X86_FEATURE_KVM_PV_SCHED_YIELD	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
#define X86_FEATURE_KVM_ASYNC_PF_INT	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
#define X86_FEATURE_KVM_MSI_EXT_DEST_ID	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
#define X86_FEATURE_KVM_MIGRATION_CONTROL	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)

219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
/*
 * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
 * value/property as opposed to a single-bit feature.  Again, pack the info
 * into a 64-bit value to pass by value with no overhead.
 */
struct kvm_x86_cpu_property {
	u32	function;
	u8	index;
	u8	reg;
	u8	lo_bit;
	u8	hi_bit;
};
#define	KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit)			\
({										\
	struct kvm_x86_cpu_property property = {				\
		.function = fn,							\
		.index = idx,							\
		.reg = KVM_CPUID_##gpr,						\
		.lo_bit = low_bit,						\
		.hi_bit = high_bit,						\
	};									\
										\
241 242 243 244 245 246
	kvm_static_assert(low_bit < high_bit);					\
	kvm_static_assert((fn & 0xc0000000) == 0 ||				\
			  (fn & 0xc0000000) == 0x40000000 ||			\
			  (fn & 0xc0000000) == 0x80000000 ||			\
			  (fn & 0xc0000000) == 0xc0000000);			\
	kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));	\
247 248 249 250
	property;								\
})

#define X86_PROPERTY_MAX_BASIC_LEAF		KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
251
#define X86_PROPERTY_PMU_VERSION		KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
252
#define X86_PROPERTY_PMU_NR_GP_COUNTERS		KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
253
#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH	KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
254
#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH	KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
255 256 257 258
#define X86_PROPERTY_PMU_EVENTS_MASK		KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK	KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS	KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH	KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
259

260
#define X86_PROPERTY_SUPPORTED_XCR0_LO		KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
261 262
#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0	KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
#define X86_PROPERTY_XSTATE_MAX_SIZE		KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
263 264
#define X86_PROPERTY_SUPPORTED_XCR0_HI		KVM_X86_CPU_PROPERTY(0xd,  0, EDX,  0, 31)

265 266
#define X86_PROPERTY_XSTATE_TILE_SIZE		KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
#define X86_PROPERTY_XSTATE_TILE_OFFSET		KVM_X86_CPU_PROPERTY(0xd, 18, EBX,  0, 31)
267
#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES	KVM_X86_CPU_PROPERTY(0x1d, 0, EAX,  0, 31)
268 269 270 271 272
#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES	KVM_X86_CPU_PROPERTY(0x1d, 1, EAX,  0, 15)
#define X86_PROPERTY_AMX_BYTES_PER_TILE		KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
#define X86_PROPERTY_AMX_BYTES_PER_ROW		KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0,  15)
#define X86_PROPERTY_AMX_NR_TILE_REGS		KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
#define X86_PROPERTY_AMX_MAX_ROWS		KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0,  15)
273 274 275 276 277

#define X86_PROPERTY_MAX_KVM_LEAF		KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)

#define X86_PROPERTY_MAX_EXT_LEAF		KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
#define X86_PROPERTY_MAX_PHY_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
278
#define X86_PROPERTY_MAX_VIRT_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
279
#define X86_PROPERTY_SEV_C_BIT			KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
280 281 282 283
#define X86_PROPERTY_PHYS_ADDR_REDUCTION	KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)

#define X86_PROPERTY_MAX_CENTAUR_LEAF		KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)

284 285 286 287 288
/*
 * Intel's architectural PMU events are bizarre.  They have a "feature" bit
 * that indicates the feature is _not_ supported, and a property that states
 * the length of the bit mask of unsupported features.  A feature is supported
 * if the size of the bit mask is larger than the "unavailable" bit, and said
289 290 291 292
 * bit is not set.  Fixed counters also bizarre enumeration, but inverted from
 * arch events for general purpose counters.  Fixed counters are supported if a
 * feature flag is set **OR** the total number of fixed counters is greater
 * than index of the counter.
293
 *
294 295
 * Wrap the events for general purpose and fixed counters to simplify checking
 * whether or not a given architectural event is supported.
296 297
 */
struct kvm_x86_pmu_feature {
298
	struct kvm_x86_cpu_feature f;
299
};
300 301 302 303 304 305 306 307 308
#define	KVM_X86_PMU_FEATURE(__reg, __bit)				\
({									\
	struct kvm_x86_pmu_feature feature = {				\
		.f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit),		\
	};								\
									\
	kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX ||		\
			  KVM_CPUID_##__reg == KVM_CPUID_ECX);		\
	feature;							\
309 310
})

311 312 313 314 315 316 317 318 319 320 321 322 323
#define X86_PMU_FEATURE_CPU_CYCLES			KVM_X86_PMU_FEATURE(EBX, 0)
#define X86_PMU_FEATURE_INSNS_RETIRED			KVM_X86_PMU_FEATURE(EBX, 1)
#define X86_PMU_FEATURE_REFERENCE_CYCLES		KVM_X86_PMU_FEATURE(EBX, 2)
#define X86_PMU_FEATURE_LLC_REFERENCES			KVM_X86_PMU_FEATURE(EBX, 3)
#define X86_PMU_FEATURE_LLC_MISSES			KVM_X86_PMU_FEATURE(EBX, 4)
#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED		KVM_X86_PMU_FEATURE(EBX, 5)
#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED		KVM_X86_PMU_FEATURE(EBX, 6)
#define X86_PMU_FEATURE_TOPDOWN_SLOTS			KVM_X86_PMU_FEATURE(EBX, 7)

#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED		KVM_X86_PMU_FEATURE(ECX, 0)
#define X86_PMU_FEATURE_CPU_CYCLES_FIXED		KVM_X86_PMU_FEATURE(ECX, 1)
#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED	KVM_X86_PMU_FEATURE(ECX, 2)
#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED		KVM_X86_PMU_FEATURE(ECX, 3)
324

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341
static inline unsigned int x86_family(unsigned int eax)
{
	unsigned int x86;

	x86 = (eax >> 8) & 0xf;

	if (x86 == 0xf)
		x86 += (eax >> 20) & 0xff;

	return x86;
}

static inline unsigned int x86_model(unsigned int eax)
{
	return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
}

342 343 344 345 346 347 348 349 350 351
/* Page table bitfield declarations */
#define PTE_PRESENT_MASK        BIT_ULL(0)
#define PTE_WRITABLE_MASK       BIT_ULL(1)
#define PTE_USER_MASK           BIT_ULL(2)
#define PTE_ACCESSED_MASK       BIT_ULL(5)
#define PTE_DIRTY_MASK          BIT_ULL(6)
#define PTE_LARGE_MASK          BIT_ULL(7)
#define PTE_GLOBAL_MASK         BIT_ULL(8)
#define PTE_NX_MASK             BIT_ULL(63)

352 353
#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)

354
#define PAGE_SHIFT		12
355
#define PAGE_SIZE		(1ULL << PAGE_SHIFT)
356 357 358 359 360
#define PAGE_MASK		(~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)

#define HUGEPAGE_SHIFT(x)	(PAGE_SHIFT + (((x) - 1) * 9))
#define HUGEPAGE_SIZE(x)	(1UL << HUGEPAGE_SHIFT(x))
#define HUGEPAGE_MASK(x)	(~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
361

362 363
#define PTE_GET_PA(pte)		((pte) & PHYSICAL_PAGE_MASK)
#define PTE_GET_PFN(pte)        (PTE_GET_PA(pte) >> PAGE_SHIFT)
364

365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
	u64 rax;
	u64 rcx;
	u64 rdx;
	u64 rbx;
	u64 rsp;
	u64 rbp;
	u64 rsi;
	u64 rdi;
	u64 r8;
	u64 r9;
	u64 r10;
	u64 r11;
	u64 r12;
	u64 r13;
	u64 r14;
	u64 r15;
};

385 386 387
struct desc64 {
	uint16_t limit0;
	uint16_t base0;
388
	unsigned base1:8, type:4, s:1, dpl:2, p:1;
389
	unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
390 391 392 393 394 395 396 397 398
	uint32_t base3;
	uint32_t zero1;
} __attribute__((packed));

struct desc_ptr {
	uint16_t size;
	uint64_t address;
} __attribute__((packed));

399 400 401 402 403 404 405 406 407 408 409 410 411 412 413
struct kvm_x86_state {
	struct kvm_xsave *xsave;
	struct kvm_vcpu_events events;
	struct kvm_mp_state mp_state;
	struct kvm_regs regs;
	struct kvm_xcrs xcrs;
	struct kvm_sregs sregs;
	struct kvm_debugregs debugregs;
	union {
		struct kvm_nested_state nested;
		char nested_[16384];
	};
	struct kvm_msrs msrs;
};

414 415 416 417 418 419 420 421 422
static inline uint64_t get_desc64_base(const struct desc64 *desc)
{
	return ((uint64_t)desc->base3 << 32) |
		(desc->base0 | ((desc->base1) << 16) | ((desc->base2) << 24));
}

static inline uint64_t rdtsc(void)
{
	uint32_t eax, edx;
423
	uint64_t tsc_val;
424 425
	/*
	 * The lfence is to wait (on Intel CPUs) until all previous
426 427 428
	 * instructions have been executed. If software requires RDTSC to be
	 * executed prior to execution of any subsequent instruction, it can
	 * execute LFENCE immediately after RDTSC
429
	 */
430 431 432
	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
	tsc_val = ((uint64_t)edx) << 32 | eax;
	return tsc_val;
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
}

static inline uint64_t rdtscp(uint32_t *aux)
{
	uint32_t eax, edx;

	__asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
	return ((uint64_t)edx) << 32 | eax;
}

static inline uint64_t rdmsr(uint32_t msr)
{
	uint32_t a, d;

	__asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");

	return a | ((uint64_t) d << 32);
}

static inline void wrmsr(uint32_t msr, uint64_t value)
{
	uint32_t a = value;
	uint32_t d = value >> 32;

	__asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
}


static inline uint16_t inw(uint16_t port)
{
	uint16_t tmp;

	__asm__ __volatile__("in %%dx, %%ax"
		: /* output */ "=a" (tmp)
		: /* input */ "d" (port));

	return tmp;
}

static inline uint16_t get_es(void)
{
	uint16_t es;

	__asm__ __volatile__("mov %%es, %[es]"
			     : /* output */ [es]"=rm"(es));
	return es;
}

static inline uint16_t get_cs(void)
{
	uint16_t cs;

	__asm__ __volatile__("mov %%cs, %[cs]"
			     : /* output */ [cs]"=rm"(cs));
	return cs;
}

static inline uint16_t get_ss(void)
{
	uint16_t ss;

	__asm__ __volatile__("mov %%ss, %[ss]"
			     : /* output */ [ss]"=rm"(ss));
	return ss;
}

static inline uint16_t get_ds(void)
{
	uint16_t ds;

	__asm__ __volatile__("mov %%ds, %[ds]"
			     : /* output */ [ds]"=rm"(ds));
	return ds;
}

static inline uint16_t get_fs(void)
{
	uint16_t fs;

	__asm__ __volatile__("mov %%fs, %[fs]"
			     : /* output */ [fs]"=rm"(fs));
	return fs;
}

static inline uint16_t get_gs(void)
{
	uint16_t gs;

	__asm__ __volatile__("mov %%gs, %[gs]"
			     : /* output */ [gs]"=rm"(gs));
	return gs;
}

static inline uint16_t get_tr(void)
{
	uint16_t tr;

	__asm__ __volatile__("str %[tr]"
			     : /* output */ [tr]"=rm"(tr));
	return tr;
}

static inline uint64_t get_cr0(void)
{
	uint64_t cr0;

	__asm__ __volatile__("mov %%cr0, %[cr0]"
			     : /* output */ [cr0]"=r"(cr0));
	return cr0;
}

static inline uint64_t get_cr3(void)
{
	uint64_t cr3;

	__asm__ __volatile__("mov %%cr3, %[cr3]"
			     : /* output */ [cr3]"=r"(cr3));
	return cr3;
}

static inline uint64_t get_cr4(void)
{
	uint64_t cr4;

	__asm__ __volatile__("mov %%cr4, %[cr4]"
			     : /* output */ [cr4]"=r"(cr4));
	return cr4;
}

static inline void set_cr4(uint64_t val)
{
	__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}

567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
static inline u64 xgetbv(u32 index)
{
	u32 eax, edx;

	__asm__ __volatile__("xgetbv;"
		     : "=a" (eax), "=d" (edx)
		     : "c" (index));
	return eax | ((u64)edx << 32);
}

static inline void xsetbv(u32 index, u64 value)
{
	u32 eax = value;
	u32 edx = value >> 32;

	__asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
}

585 586 587 588 589 590 591
static inline void wrpkru(u32 pkru)
{
	/* Note, ECX and EDX are architecturally required to be '0'. */
	asm volatile(".byte 0x0f,0x01,0xef\n\t"
		     : : "a" (pkru), "c"(0), "d"(0));
}

592
static inline struct desc_ptr get_gdt(void)
593 594 595 596
{
	struct desc_ptr gdt;
	__asm__ __volatile__("sgdt %[gdt]"
			     : /* output */ [gdt]"=m"(gdt));
597
	return gdt;
598 599
}

600
static inline struct desc_ptr get_idt(void)
601 602 603 604
{
	struct desc_ptr idt;
	__asm__ __volatile__("sidt %[idt]"
			     : /* output */ [idt]"=m"(idt));
605
	return idt;
606 607
}

608 609 610 611 612
static inline void outl(uint16_t port, uint32_t value)
{
	__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
}

613 614 615
static inline void __cpuid(uint32_t function, uint32_t index,
			   uint32_t *eax, uint32_t *ebx,
			   uint32_t *ecx, uint32_t *edx)
616
{
617 618 619
	*eax = function;
	*ecx = index;

620 621 622 623 624 625 626 627 628
	asm volatile("cpuid"
	    : "=a" (*eax),
	      "=b" (*ebx),
	      "=c" (*ecx),
	      "=d" (*edx)
	    : "0" (*eax), "2" (*ecx)
	    : "memory");
}

629 630 631 632 633 634 635
static inline void cpuid(uint32_t function,
			 uint32_t *eax, uint32_t *ebx,
			 uint32_t *ecx, uint32_t *edx)
{
	return __cpuid(function, 0, eax, ebx, ecx, edx);
}

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
static inline uint32_t this_cpu_fms(void)
{
	uint32_t eax, ebx, ecx, edx;

	cpuid(1, &eax, &ebx, &ecx, &edx);
	return eax;
}

static inline uint32_t this_cpu_family(void)
{
	return x86_family(this_cpu_fms());
}

static inline uint32_t this_cpu_model(void)
{
	return x86_model(this_cpu_fms());
}

654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
static inline bool this_cpu_vendor_string_is(const char *vendor)
{
	const uint32_t *chunk = (const uint32_t *)vendor;
	uint32_t eax, ebx, ecx, edx;

	cpuid(0, &eax, &ebx, &ecx, &edx);
	return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
}

static inline bool this_cpu_is_intel(void)
{
	return this_cpu_vendor_string_is("GenuineIntel");
}

/*
 * Exclude early K5 samples with a vendor string of "AMDisbetter!"
 */
static inline bool this_cpu_is_amd(void)
{
	return this_cpu_vendor_string_is("AuthenticAMD");
}

676 677
static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
				      uint8_t reg, uint8_t lo, uint8_t hi)
678 679 680
{
	uint32_t gprs[4];

681
	__cpuid(function, index,
682 683 684
		&gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
		&gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);

685 686 687 688 689 690 691
	return (gprs[reg] & GENMASK(hi, lo)) >> lo;
}

static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
{
	return __this_cpu_has(feature.function, feature.index,
			      feature.reg, feature.bit, feature.bit);
692 693
}

694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
{
	return __this_cpu_has(property.function, property.index,
			      property.reg, property.lo_bit, property.hi_bit);
}

static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
{
	uint32_t max_leaf;

	switch (property.function & 0xc0000000) {
	case 0:
		max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
		break;
	case 0x40000000:
		max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
		break;
	case 0x80000000:
		max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
		break;
	case 0xc0000000:
		max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
	}
	return max_leaf >= property.function;
}

720 721
static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
{
722
	uint32_t nr_bits;
723

724 725 726 727
	if (feature.f.reg == KVM_CPUID_EBX) {
		nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
		return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
	}
728

729 730 731
	GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
	nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
	return nr_bits > feature.f.bit || this_cpu_has(feature.f);
732 733
}

734 735 736 737 738 739 740 741 742
static __always_inline uint64_t this_cpu_supported_xcr0(void)
{
	if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
		return 0;

	return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
	       ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
}

743 744 745 746
typedef u32		__attribute__((vector_size(16))) sse128_t;
#define __sse128_u	union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
#define sse128_lo(x)	({ __sse128_u t; t.vec = x; t.as_u64[0]; })
#define sse128_hi(x)	({ __sse128_u t; t.vec = x; t.as_u64[1]; })
747

748
static inline void read_sse_reg(int reg, sse128_t *data)
749
{
750
	switch (reg) {
751
	case 0:
752
		asm("movdqa %%xmm0, %0" : "=m"(*data));
753 754
		break;
	case 1:
755
		asm("movdqa %%xmm1, %0" : "=m"(*data));
756 757
		break;
	case 2:
758
		asm("movdqa %%xmm2, %0" : "=m"(*data));
759 760
		break;
	case 3:
761
		asm("movdqa %%xmm3, %0" : "=m"(*data));
762 763
		break;
	case 4:
764
		asm("movdqa %%xmm4, %0" : "=m"(*data));
765 766
		break;
	case 5:
767
		asm("movdqa %%xmm5, %0" : "=m"(*data));
768 769
		break;
	case 6:
770
		asm("movdqa %%xmm6, %0" : "=m"(*data));
771 772
		break;
	case 7:
773
		asm("movdqa %%xmm7, %0" : "=m"(*data));
774
		break;
775 776
	default:
		BUG();
777 778 779
	}
}

780
static inline void write_sse_reg(int reg, const sse128_t *data)
781
{
782
	switch (reg) {
783
	case 0:
784 785
		asm("movdqa %0, %%xmm0" : : "m"(*data));
		break;
786
	case 1:
787 788
		asm("movdqa %0, %%xmm1" : : "m"(*data));
		break;
789
	case 2:
790 791
		asm("movdqa %0, %%xmm2" : : "m"(*data));
		break;
792
	case 3:
793 794
		asm("movdqa %0, %%xmm3" : : "m"(*data));
		break;
795
	case 4:
796 797
		asm("movdqa %0, %%xmm4" : : "m"(*data));
		break;
798
	case 5:
799 800
		asm("movdqa %0, %%xmm5" : : "m"(*data));
		break;
801
	case 6:
802 803
		asm("movdqa %0, %%xmm6" : : "m"(*data));
		break;
804
	case 7:
805 806 807 808
		asm("movdqa %0, %%xmm7" : : "m"(*data));
		break;
	default:
		BUG();
809 810 811
	}
}

812 813 814 815 816
static inline void cpu_relax(void)
{
	asm volatile("rep; nop" ::: "memory");
}

817 818 819 820 821 822 823 824 825 826
#define ud2()			\
	__asm__ __volatile__(	\
		"ud2\n"	\
		)

#define hlt()			\
	__asm__ __volatile__(	\
		"hlt\n"	\
		)

827 828
struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
829
void kvm_x86_state_cleanup(struct kvm_x86_state *state);
830

831
const struct kvm_msr_list *kvm_get_msr_index_list(void);
832
const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
833
bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
834
uint64_t kvm_get_feature_msr(uint64_t msr_index);
835

836
static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
837 838
				 struct kvm_msrs *msrs)
{
839
	int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
840 841 842 843 844

	TEST_ASSERT(r == msrs->nmsrs,
		    "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
		    r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
}
845
static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
846
{
847
	int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
848 849

	TEST_ASSERT(r == msrs->nmsrs,
850
		    "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
851 852
		    r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
}
853
static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
854 855
				      struct kvm_debugregs *debugregs)
{
856
	vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
857
}
858
static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
859 860
				      struct kvm_debugregs *debugregs)
{
861
	vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
862
}
863
static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
864 865
				  struct kvm_xsave *xsave)
{
866
	vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
867
}
868
static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
869 870
				   struct kvm_xsave *xsave)
{
871
	vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
872
}
873
static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
874 875
				  struct kvm_xsave *xsave)
{
876
	vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
877
}
878
static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
879 880
				 struct kvm_xcrs *xcrs)
{
881
	vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
882
}
883
static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
884
{
885
	vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
886 887
}

888 889
const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
					       uint32_t function, uint32_t index);
890 891 892
const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
893

894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
static inline uint32_t kvm_cpu_fms(void)
{
	return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
}

static inline uint32_t kvm_cpu_family(void)
{
	return x86_family(kvm_cpu_fms());
}

static inline uint32_t kvm_cpu_model(void)
{
	return x86_model(kvm_cpu_fms());
}

909 910 911 912 913 914 915 916
bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
		   struct kvm_x86_cpu_feature feature);

static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
{
	return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
}

917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944
uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
			    struct kvm_x86_cpu_property property);

static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
{
	return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
}

static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
{
	uint32_t max_leaf;

	switch (property.function & 0xc0000000) {
	case 0:
		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
		break;
	case 0x40000000:
		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
		break;
	case 0x80000000:
		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
		break;
	case 0xc0000000:
		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
	}
	return max_leaf >= property.function;
}

945 946
static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
{
947 948 949 950 951 952
	uint32_t nr_bits;

	if (feature.f.reg == KVM_CPUID_EBX) {
		nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
		return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
	}
953

954 955 956
	TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
	nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
	return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
957 958
}

959 960 961 962 963 964 965 966 967
static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
{
	if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
		return 0;

	return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
	       ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
}

968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
static inline size_t kvm_cpuid2_size(int nr_entries)
{
	return sizeof(struct kvm_cpuid2) +
	       sizeof(struct kvm_cpuid_entry2) * nr_entries;
}

/*
 * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
 * entries sized to hold @nr_entries.  The caller is responsible for freeing
 * the struct.
 */
static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
{
	struct kvm_cpuid2 *cpuid;

	cpuid = malloc(kvm_cpuid2_size(nr_entries));
	TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");

	cpuid->nent = nr_entries;

	return cpuid;
}

991 992
void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
993

994 995 996 997
static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
							      uint32_t function,
							      uint32_t index)
{
998 999
	return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
							  function, index);
1000 1001 1002 1003 1004 1005 1006 1007
}

static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
							    uint32_t function)
{
	return __vcpu_get_cpuid_entry(vcpu, function, 0);
}

1008
static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
1009
{
1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
	int r;

	TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
	r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
	if (r)
		return r;

	/* On success, refresh the cache to pick up adjustments made by KVM. */
	vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
	return 0;
1020 1021
}

1022
static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
1023
{
1024 1025 1026 1027 1028
	TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
	vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);

	/* Refresh the cache to pick up adjustments made by KVM. */
	vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
1029
}
1030

1031 1032 1033
void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
			     struct kvm_x86_cpu_property property,
			     uint32_t value);
1034
void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
1035

1036
void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
1037

1038 1039 1040 1041 1042 1043 1044 1045 1046
static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
				  struct kvm_x86_cpu_feature feature)
{
	struct kvm_cpuid_entry2 *entry;

	entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
	return *((&entry->eax) + feature.reg) & BIT(feature.bit);
}

1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
				     struct kvm_x86_cpu_feature feature,
				     bool set);

static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
					  struct kvm_x86_cpu_feature feature)
{
	vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);

}

static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
					    struct kvm_x86_cpu_feature feature)
{
	vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
}

1064 1065
uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
1066

1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
/*
 * Assert on an MSR access(es) and pretty print the MSR name when possible.
 * Note, the caller provides the stringified name so that the name of macro is
 * printed, not the value the macro resolves to (due to macro expansion).
 */
#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...)				\
do {										\
	if (__builtin_constant_p(msr)) {					\
		TEST_ASSERT(cond, fmt, str, args);				\
	} else if (!(cond)) {							\
		char buf[16];							\
										\
		snprintf(buf, sizeof(buf), "MSR 0x%x", msr);			\
		TEST_ASSERT(cond, fmt, buf, args);				\
	}									\
} while (0)

1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
/*
 * Returns true if KVM should return the last written value when reading an MSR
 * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
 * is changing, etc.  This is NOT an exhaustive list!  The intent is to filter
 * out MSRs that are not durable _and_ that a selftest wants to write.
 */
static inline bool is_durable_msr(uint32_t msr)
{
	return msr != MSR_IA32_TSC;
}

1095 1096
#define vcpu_set_msr(vcpu, msr, val)							\
do {											\
1097
	uint64_t r, v = val;								\
1098 1099 1100
											\
	TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1,				\
			"KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v);	\
1101 1102 1103 1104
	if (!is_durable_msr(msr))							\
		break;									\
	r = vcpu_get_msr(vcpu, msr);							\
	TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
1105
} while (0)
1106

1107
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
1108
void kvm_init_vm_address_properties(struct kvm_vm *vm);
1109
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
1110

1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
struct ex_regs {
	uint64_t rax, rcx, rdx, rbx;
	uint64_t rbp, rsi, rdi;
	uint64_t r8, r9, r10, r11;
	uint64_t r12, r13, r14, r15;
	uint64_t vector;
	uint64_t error_code;
	uint64_t rip;
	uint64_t cs;
	uint64_t rflags;
};

1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135
struct idt_entry {
	uint16_t offset0;
	uint16_t selector;
	uint16_t ist : 3;
	uint16_t : 5;
	uint16_t type : 4;
	uint16_t : 1;
	uint16_t dpl : 2;
	uint16_t p : 1;
	uint16_t offset1;
	uint32_t offset2; uint32_t reserved;
};

1136
void vm_init_descriptor_tables(struct kvm_vm *vm);
1137
void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
1138
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
1139 1140
			void (*handler)(struct ex_regs *));

1141
/* If a toddler were to say "abracadabra". */
1142
#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
1143 1144 1145 1146 1147 1148 1149 1150 1151

/*
 * KVM selftest exception fixup uses registers to coordinate with the exception
 * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
 * per-CPU data.  Using only registers avoids having to map memory into the
 * guest, doesn't require a valid, stable GS.base, and reduces the risk of
 * for recursive faults when accessing memory in the handler.  The downside to
 * using registers is that it restricts what registers can be used by the actual
 * instruction.  But, selftests are 64-bit only, making register* pressure a
1152
 * minor concern.  Use r9-r11 as they are volatile, i.e. don't need to be saved
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
 * by the callee, and except for r11 are not implicit parameters to any
 * instructions.  Ideally, fixup would use r8-r10 and thus avoid implicit
 * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
 * is higher priority than testing non-faulting SYSCALL/SYSRET.
 *
 * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
 * is guaranteed to be non-zero on fault.
 *
 * REGISTER INPUTS:
 * r9  = MAGIC
 * r10 = RIP
 * r11 = new RIP on fault
 *
 * REGISTER OUTPUTS:
 * r9  = exception vector (non-zero)
1168
 * r10 = error code
1169
 */
1170
#define __KVM_ASM_SAFE(insn, fep)				\
1171 1172 1173
	"mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"	\
	"lea 1f(%%rip), %%r10\n\t"				\
	"lea 2f(%%rip), %%r11\n\t"				\
1174
	fep "1: " insn "\n\t"					\
1175
	"xor %%r9, %%r9\n\t"					\
1176
	"2:\n\t"						\
1177 1178
	"mov  %%r9b, %[vector]\n\t"				\
	"mov  %%r10, %[error_code]\n\t"
1179

1180 1181 1182
#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)

1183
#define KVM_ASM_SAFE_OUTPUTS(v, ec)	[vector] "=qm"(v), [error_code] "=rm"(ec)
1184 1185
#define KVM_ASM_SAFE_CLOBBERS	"r9", "r10", "r11"

1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
#define kvm_asm_safe(insn, inputs...)					\
({									\
	uint64_t ign_error_code;					\
	uint8_t vector;							\
									\
	asm volatile(KVM_ASM_SAFE(insn)					\
		     : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)	\
		     : inputs						\
		     : KVM_ASM_SAFE_CLOBBERS);				\
	vector;								\
})

#define kvm_asm_safe_ec(insn, error_code, inputs...)			\
({									\
	uint8_t vector;							\
									\
	asm volatile(KVM_ASM_SAFE(insn)					\
		     : KVM_ASM_SAFE_OUTPUTS(vector, error_code)		\
		     : inputs						\
		     : KVM_ASM_SAFE_CLOBBERS);				\
	vector;								\
1207 1208
})

1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
#define kvm_asm_safe_fep(insn, inputs...)				\
({									\
	uint64_t ign_error_code;					\
	uint8_t vector;							\
									\
	asm volatile(KVM_ASM_SAFE(insn)					\
		     : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)	\
		     : inputs						\
		     : KVM_ASM_SAFE_CLOBBERS);				\
	vector;								\
})
1220

1221 1222 1223 1224 1225 1226 1227 1228 1229 1230
#define kvm_asm_safe_ec_fep(insn, error_code, inputs...)		\
({									\
	uint8_t vector;							\
									\
	asm volatile(KVM_ASM_SAFE_FEP(insn)				\
		     : KVM_ASM_SAFE_OUTPUTS(vector, error_code)		\
		     : inputs						\
		     : KVM_ASM_SAFE_CLOBBERS);				\
	vector;								\
})
1231

1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246
#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)			\
static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val)	\
{									\
	uint64_t error_code;						\
	uint8_t vector;							\
	uint32_t a, d;							\
									\
	asm volatile(KVM_ASM_SAFE##_FEP(#insn)				\
		     : "=a"(a), "=d"(d),				\
		       KVM_ASM_SAFE_OUTPUTS(vector, error_code)		\
		     : "c"(idx)						\
		     : KVM_ASM_SAFE_CLOBBERS);				\
									\
	*val = (uint64_t)a | ((uint64_t)d << 32);			\
	return vector;							\
1247 1248
}

1249 1250 1251 1252 1253 1254 1255
/*
 * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
 * use ECX as in input index, and EDX:EAX as a 64-bit output.
 */
#define BUILD_READ_U64_SAFE_HELPERS(insn)				\
	BUILD_READ_U64_SAFE_HELPER(insn, , )				\
	BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)			\
1256

1257 1258 1259
BUILD_READ_U64_SAFE_HELPERS(rdmsr)
BUILD_READ_U64_SAFE_HELPERS(rdpmc)
BUILD_READ_U64_SAFE_HELPERS(xgetbv)
1260 1261 1262

static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
{
1263
	return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
1264 1265
}

1266 1267 1268 1269 1270 1271 1272 1273
static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
{
	u32 eax = value;
	u32 edx = value >> 32;

	return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
}

1274 1275
bool kvm_is_tdp_enabled(void);

1276 1277 1278 1279 1280
static inline bool kvm_is_pmu_enabled(void)
{
	return get_kvm_param_bool("enable_pmu");
}

1281 1282 1283 1284 1285
static inline bool kvm_is_forced_emulation_enabled(void)
{
	return !!get_kvm_param_integer("force_emulation_prefix");
}

1286 1287
uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
				    int *level);
1288
uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
1289

1290 1291
uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
		       uint64_t a3);
1292 1293
uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
1294

1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308
static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
						     uint64_t size, uint64_t flags)
{
	return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
}

static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
					       uint64_t flags)
{
	uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);

	GUEST_ASSERT(!ret);
}

1309
void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
1310

1311 1312
#define vm_xsave_require_permission(xfeature)	\
	__vm_xsave_require_permission(xfeature, #xfeature)
1313

1314 1315 1316 1317 1318 1319 1320
enum pg_level {
	PG_LEVEL_NONE,
	PG_LEVEL_4K,
	PG_LEVEL_2M,
	PG_LEVEL_1G,
	PG_LEVEL_512G,
	PG_LEVEL_NUM
1321
};
1322 1323 1324 1325

#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))

1326 1327 1328 1329
#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)

1330
void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
1331 1332
void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
		    uint64_t nr_bytes, int level);
1333

1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348
/*
 * Basic CPU control in CR0
 */
#define X86_CR0_PE          (1UL<<0) /* Protection Enable */
#define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
#define X86_CR0_EM          (1UL<<2) /* Emulation */
#define X86_CR0_TS          (1UL<<3) /* Task Switched */
#define X86_CR0_ET          (1UL<<4) /* Extension Type */
#define X86_CR0_NE          (1UL<<5) /* Numeric Error */
#define X86_CR0_WP          (1UL<<16) /* Write Protect */
#define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
#define X86_CR0_NW          (1UL<<29) /* Not Write-through */
#define X86_CR0_CD          (1UL<<30) /* Cache Disable */
#define X86_CR0_PG          (1UL<<31) /* Paging */

1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
#define PFERR_PRESENT_BIT 0
#define PFERR_WRITE_BIT 1
#define PFERR_USER_BIT 2
#define PFERR_RSVD_BIT 3
#define PFERR_FETCH_BIT 4
#define PFERR_PK_BIT 5
#define PFERR_SGX_BIT 15
#define PFERR_GUEST_FINAL_BIT 32
#define PFERR_GUEST_PAGE_BIT 33
#define PFERR_IMPLICIT_ACCESS_BIT 48

#define PFERR_PRESENT_MASK	BIT(PFERR_PRESENT_BIT)
#define PFERR_WRITE_MASK	BIT(PFERR_WRITE_BIT)
#define PFERR_USER_MASK		BIT(PFERR_USER_BIT)
#define PFERR_RSVD_MASK		BIT(PFERR_RSVD_BIT)
#define PFERR_FETCH_MASK	BIT(PFERR_FETCH_BIT)
#define PFERR_PK_MASK		BIT(PFERR_PK_BIT)
#define PFERR_SGX_MASK		BIT(PFERR_SGX_BIT)
#define PFERR_GUEST_FINAL_MASK	BIT_ULL(PFERR_GUEST_FINAL_BIT)
#define PFERR_GUEST_PAGE_MASK	BIT_ULL(PFERR_GUEST_PAGE_BIT)
#define PFERR_IMPLICIT_ACCESS	BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)

1371
bool sys_clocksource_is_based_on_tsc(void);
1372

1373
#endif /* SELFTEST_KVM_PROCESSOR_H */