Commit 92a0610b authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_cpu_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 cpu updates from Borislav Petkov:

 - Add support for hardware-enforced cache coherency on AMD which
   obviates the need to flush cachelines before changing the PTE
   encryption bit (Krish Sadhukhan)

 - Add Centaur initialization support for families >= 7 (Tony W Wang-oc)

 - Add a feature flag for, and expose TSX suspend load tracking feature
   to KVM (Cathy Zhang)

 - Emulate SLDT and STR so that windows programs don't crash on UMIP
   machines (Brendan Shanks and Ricardo Neri)

 - Use the new SERIALIZE insn on Intel hardware which supports it
   (Ricardo Neri)

 - Misc cleanups and fixes

* tag 'x86_cpu_for_v5.10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  KVM: SVM: Don't flush cache if hardware enforces cache coherency across encryption domains
  x86/mm/pat: Don't flush cache if hardware enforces cache coherency across encryption domnains
  x86/cpu: Add hardware-enforced cache coherency as a CPUID feature
  x86/cpu/centaur: Add Centaur family >=7 CPUs initialization support
  x86/cpu/centaur: Replace two-condition switch-case with an if statement
  x86/kvm: Expose TSX Suspend Load Tracking feature
  x86/cpufeatures: Enumerate TSX suspend load address tracking instructions
  x86/umip: Add emulation/spoofing for SLDT and STR instructions
  x86/cpu: Fix typos and improve the comments in sync_core()
  x86/cpu: Use XGETBV and XSETBV mnemonics in fpu/internal.h
  x86/cpu: Use SERIALIZE in sync_core() when available
parents ca1b6692 e1ebb2b4
...@@ -96,7 +96,7 @@ ...@@ -96,7 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
/* free ( 3*32+17) */ #define X86_FEATURE_SME_COHERENT ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
...@@ -368,6 +368,7 @@ ...@@ -368,6 +368,7 @@
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */ #define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */ #define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */ #define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
......
...@@ -602,9 +602,7 @@ static inline u64 xgetbv(u32 index) ...@@ -602,9 +602,7 @@ static inline u64 xgetbv(u32 index)
{ {
u32 eax, edx; u32 eax, edx;
asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ asm volatile("xgetbv" : "=a" (eax), "=d" (edx) : "c" (index));
: "=a" (eax), "=d" (edx)
: "c" (index));
return eax + ((u64)edx << 32); return eax + ((u64)edx << 32);
} }
...@@ -613,8 +611,7 @@ static inline void xsetbv(u32 index, u64 value) ...@@ -613,8 +611,7 @@ static inline void xsetbv(u32 index, u64 value)
u32 eax = value; u32 eax = value;
u32 edx = value >> 32; u32 edx = value >> 32;
asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */ asm volatile("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
: : "a" (eax), "d" (edx), "c" (index));
} }
#endif /* _ASM_X86_FPU_INTERNAL_H */ #endif /* _ASM_X86_FPU_INTERNAL_H */
...@@ -234,6 +234,12 @@ static inline void clwb(volatile void *__p) ...@@ -234,6 +234,12 @@ static inline void clwb(volatile void *__p)
#define nop() asm volatile ("nop") #define nop() asm volatile ("nop")
static inline void serialize(void)
{
/* Instruction opcode for SERIALIZE; supported in binutils >= 2.35. */
asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
}
#endif /* __KERNEL__ */ #endif /* __KERNEL__ */
#endif /* _ASM_X86_SPECIAL_INSNS_H */ #endif /* _ASM_X86_SPECIAL_INSNS_H */
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#include <linux/preempt.h> #include <linux/preempt.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm/cpufeature.h> #include <asm/cpufeature.h>
#include <asm/special_insns.h>
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
static inline void iret_to_self(void) static inline void iret_to_self(void)
...@@ -46,22 +47,34 @@ static inline void iret_to_self(void) ...@@ -46,22 +47,34 @@ static inline void iret_to_self(void)
* *
* b) Text was modified on a different CPU, may subsequently be * b) Text was modified on a different CPU, may subsequently be
* executed on this CPU, and you want to make sure the new version * executed on this CPU, and you want to make sure the new version
* gets executed. This generally means you're calling this in a IPI. * gets executed. This generally means you're calling this in an IPI.
* *
* If you're calling this for a different reason, you're probably doing * If you're calling this for a different reason, you're probably doing
* it wrong. * it wrong.
*
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/ */
static inline void sync_core(void) static inline void sync_core(void)
{ {
/* /*
* There are quite a few ways to do this. IRET-to-self is nice * The SERIALIZE instruction is the most straightforward way to
* because it works on every CPU, at any CPL (so it's compatible * do this, but it is not universally available.
* with paravirtualization), and it never exits to a hypervisor. */
* The only down sides are that it's a bit slow (it seems to be if (static_cpu_has(X86_FEATURE_SERIALIZE)) {
* a bit more than 2x slower than the fastest options) and that serialize();
* it unmasks NMIs. The "push %cs" is needed because, in return;
* paravirtual environments, __KERNEL_CS may not be a valid CS }
* value when we do IRET directly.
/*
* For all other processors, there are quite a few ways to do this.
* IRET-to-self is nice because it works on every CPU, at any CPL
* (so it's compatible with paravirtualization), and it never exits
* to a hypervisor. The only downsides are that it's a bit slow
* (it seems to be a bit more than 2x slower than the fastest
* options) and that it unmasks NMIs. The "push %cs" is needed,
* because in paravirtual environments __KERNEL_CS may not be a
* valid CS value when we do IRET directly.
* *
* In case NMI unmasking or performance ever becomes a problem, * In case NMI unmasking or performance ever becomes a problem,
* the next best option appears to be MOV-to-CR2 and an * the next best option appears to be MOV-to-CR2 and an
...@@ -71,9 +84,6 @@ static inline void sync_core(void) ...@@ -71,9 +84,6 @@ static inline void sync_core(void)
* CPUID is the conventional way, but it's nasty: it doesn't * CPUID is the conventional way, but it's nasty: it doesn't
* exist on some 486-like CPUs, and it usually exits to a * exist on some 486-like CPUs, and it usually exits to a
* hypervisor. * hypervisor.
*
* Like all of Linux's memory ordering operations, this is a
* compiler barrier as well.
*/ */
iret_to_self(); iret_to_self();
} }
......
...@@ -65,6 +65,9 @@ static void init_c3(struct cpuinfo_x86 *c) ...@@ -65,6 +65,9 @@ static void init_c3(struct cpuinfo_x86 *c)
c->x86_cache_alignment = c->x86_clflush_size * 2; c->x86_cache_alignment = c->x86_clflush_size * 2;
set_cpu_cap(c, X86_FEATURE_REP_GOOD); set_cpu_cap(c, X86_FEATURE_REP_GOOD);
} }
if (c->x86 >= 7)
set_cpu_cap(c, X86_FEATURE_REP_GOOD);
} }
enum { enum {
...@@ -90,18 +93,15 @@ enum { ...@@ -90,18 +93,15 @@ enum {
static void early_init_centaur(struct cpuinfo_x86 *c) static void early_init_centaur(struct cpuinfo_x86 *c)
{ {
switch (c->x86) {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
case 5:
/* Emulate MTRRs using Centaur's MCR. */ /* Emulate MTRRs using Centaur's MCR. */
if (c->x86 == 5)
set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR); set_cpu_cap(c, X86_FEATURE_CENTAUR_MCR);
break;
#endif #endif
case 6: if ((c->x86 == 6 && c->x86_model >= 0xf) ||
if (c->x86_model >= 0xf) (c->x86 >= 7))
set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
break;
}
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_SYSENTER32); set_cpu_cap(c, X86_FEATURE_SYSENTER32);
#endif #endif
...@@ -145,9 +145,8 @@ static void init_centaur(struct cpuinfo_x86 *c) ...@@ -145,9 +145,8 @@ static void init_centaur(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
} }
switch (c->x86) {
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
case 5: if (c->x86 == 5) {
switch (c->x86_model) { switch (c->x86_model) {
case 4: case 4:
name = "C6"; name = "C6";
...@@ -207,12 +206,10 @@ static void init_centaur(struct cpuinfo_x86 *c) ...@@ -207,12 +206,10 @@ static void init_centaur(struct cpuinfo_x86 *c)
c->x86_cache_size = (cc>>24)+(dd>>24); c->x86_cache_size = (cc>>24)+(dd>>24);
} }
sprintf(c->x86_model_id, "WinChip %s", name); sprintf(c->x86_model_id, "WinChip %s", name);
break; }
#endif #endif
case 6: if (c->x86 == 6 || c->x86 >= 7)
init_c3(c); init_c3(c);
break;
}
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif #endif
......
...@@ -41,6 +41,7 @@ static const struct cpuid_bit cpuid_bits[] = { ...@@ -41,6 +41,7 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 }, { X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 }, { X86_FEATURE_SME, CPUID_EAX, 0, 0x8000001f, 0 },
{ X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 }, { X86_FEATURE_SEV, CPUID_EAX, 1, 0x8000001f, 0 },
{ X86_FEATURE_SME_COHERENT, CPUID_EAX, 10, 0x8000001f, 0 },
{ 0, 0, 0, 0, 0 } { 0, 0, 0, 0, 0 }
}; };
......
...@@ -45,11 +45,12 @@ ...@@ -45,11 +45,12 @@
* value that, lies close to the top of the kernel memory. The limit for the GDT * value that, lies close to the top of the kernel memory. The limit for the GDT
* and the IDT are set to zero. * and the IDT are set to zero.
* *
* Given that SLDT and STR are not commonly used in programs that run on WineHQ * The instruction SMSW is emulated to return the value that the register CR0
* or DOSEMU2, they are not emulated.
*
* The instruction smsw is emulated to return the value that the register CR0
* has at boot time as set in the head_32. * has at boot time as set in the head_32.
* SLDT and STR are emulated to return the values that the kernel programmatically
* assigns:
* - SLDT returns (GDT_ENTRY_LDT * 8) if an LDT has been set, 0 if not.
* - STR returns (GDT_ENTRY_TSS * 8).
* *
* Emulation is provided for both 32-bit and 64-bit processes. * Emulation is provided for both 32-bit and 64-bit processes.
* *
...@@ -244,16 +245,34 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst, ...@@ -244,16 +245,34 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
*data_size += UMIP_GDT_IDT_LIMIT_SIZE; *data_size += UMIP_GDT_IDT_LIMIT_SIZE;
memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE); memcpy(data, &dummy_limit, UMIP_GDT_IDT_LIMIT_SIZE);
} else if (umip_inst == UMIP_INST_SMSW) { } else if (umip_inst == UMIP_INST_SMSW || umip_inst == UMIP_INST_SLDT ||
unsigned long dummy_value = CR0_STATE; umip_inst == UMIP_INST_STR) {
unsigned long dummy_value;
if (umip_inst == UMIP_INST_SMSW) {
dummy_value = CR0_STATE;
} else if (umip_inst == UMIP_INST_STR) {
dummy_value = GDT_ENTRY_TSS * 8;
} else if (umip_inst == UMIP_INST_SLDT) {
#ifdef CONFIG_MODIFY_LDT_SYSCALL
down_read(&current->mm->context.ldt_usr_sem);
if (current->mm->context.ldt)
dummy_value = GDT_ENTRY_LDT * 8;
else
dummy_value = 0;
up_read(&current->mm->context.ldt_usr_sem);
#else
dummy_value = 0;
#endif
}
/* /*
* Even though the CR0 register has 4 bytes, the number * For these 3 instructions, the number
* of bytes to be copied in the result buffer is determined * of bytes to be copied in the result buffer is determined
* by whether the operand is a register or a memory location. * by whether the operand is a register or a memory location.
* If operand is a register, return as many bytes as the operand * If operand is a register, return as many bytes as the operand
* size. If operand is memory, return only the two least * size. If operand is memory, return only the two least
* siginificant bytes of CR0. * siginificant bytes.
*/ */
if (X86_MODRM_MOD(insn->modrm.value) == 3) if (X86_MODRM_MOD(insn->modrm.value) == 3)
*data_size = insn->opnd_bytes; *data_size = insn->opnd_bytes;
...@@ -261,7 +280,6 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst, ...@@ -261,7 +280,6 @@ static int emulate_umip_insn(struct insn *insn, int umip_inst,
*data_size = 2; *data_size = 2;
memcpy(data, &dummy_value, *data_size); memcpy(data, &dummy_value, *data_size);
/* STR and SLDT are not emulated */
} else { } else {
return -EINVAL; return -EINVAL;
} }
...@@ -383,10 +401,6 @@ bool fixup_umip_exception(struct pt_regs *regs) ...@@ -383,10 +401,6 @@ bool fixup_umip_exception(struct pt_regs *regs)
umip_pr_warn(regs, "%s instruction cannot be used by applications.\n", umip_pr_warn(regs, "%s instruction cannot be used by applications.\n",
umip_insns[umip_inst]); umip_insns[umip_inst]);
/* Do not emulate (spoof) SLDT or STR. */
if (umip_inst == UMIP_INST_STR || umip_inst == UMIP_INST_SLDT)
return false;
umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n"); umip_pr_warn(regs, "For now, expensive software emulation returns the result.\n");
if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size, if (emulate_umip_insn(&insn, umip_inst, dummy_data, &dummy_data_size,
......
...@@ -371,7 +371,7 @@ void kvm_set_cpu_caps(void) ...@@ -371,7 +371,7 @@ void kvm_set_cpu_caps(void)
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) | F(MD_CLEAR) | F(AVX512_VP2INTERSECT) | F(FSRM) |
F(SERIALIZE) F(SERIALIZE) | F(TSXLDTRK)
); );
/* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */ /* TSC_ADJUST and ARCH_CAPABILITIES are emulated in software. */
......
...@@ -384,7 +384,8 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages) ...@@ -384,7 +384,8 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
uint8_t *page_virtual; uint8_t *page_virtual;
unsigned long i; unsigned long i;
if (npages == 0 || pages == NULL) if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
pages == NULL)
return; return;
for (i = 0; i < npages; i++) { for (i = 0; i < npages; i++) {
......
...@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) ...@@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
/* /*
* Before changing the encryption attribute, we need to flush caches. * Before changing the encryption attribute, we need to flush caches.
*/ */
cpa_flush(&cpa, 1); cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT));
ret = __change_page_attr_set_clr(&cpa, 1); ret = __change_page_attr_set_clr(&cpa, 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment