Commit 22b8cc3e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'x86_mm_for_6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 LAM (Linear Address Masking) support from Dave Hansen:
 "Add support for the new Linear Address Masking CPU feature.

  This is similar to ARM's Top Byte Ignore and allows userspace to store
  metadata in some bits of pointers without masking it out before use"

* tag 'x86_mm_for_6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm/iommu/sva: Do not allow to set FORCE_TAGGED_SVA bit from outside
  x86/mm/iommu/sva: Fix error code for LAM enabling failure due to SVA
  selftests/x86/lam: Add test cases for LAM vs thread creation
  selftests/x86/lam: Add ARCH_FORCE_TAGGED_SVA test cases for linear-address masking
  selftests/x86/lam: Add inherit test cases for linear-address masking
  selftests/x86/lam: Add io_uring test cases for linear-address masking
  selftests/x86/lam: Add mmap and SYSCALL test cases for linear-address masking
  selftests/x86/lam: Add malloc and tag-bits test cases for linear-address masking
  x86/mm/iommu/sva: Make LAM and SVA mutually exclusive
  iommu/sva: Replace pasid_valid() helper with mm_valid_pasid()
  mm: Expose untagging mask in /proc/$PID/status
  x86/mm: Provide arch_prctl() interface for LAM
  x86/mm: Reduce untagged_addr() overhead for systems without LAM
  x86/uaccess: Provide untagged_addr() and remove tags before address check
  mm: Introduce untagged_addr_remote()
  x86/mm: Handle LAM on context switch
  x86: CPUID and CR3/CR4 flags for Linear Address Masking
  x86: Allow atomic MM_CONTEXT flags setting
  x86/mm: Rework address range check in get_user() and put_user()
parents 7b664cc3 97740266
...@@ -288,6 +288,12 @@ void post_ttbr_update_workaround(void); ...@@ -288,6 +288,12 @@ void post_ttbr_update_workaround(void);
unsigned long arm64_mm_context_get(struct mm_struct *mm); unsigned long arm64_mm_context_get(struct mm_struct *mm);
void arm64_mm_context_put(struct mm_struct *mm); void arm64_mm_context_put(struct mm_struct *mm);
#define mm_untag_mask mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
return -1UL >> 8;
}
#include <asm-generic/mmu_context.h> #include <asm-generic/mmu_context.h>
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -185,6 +185,12 @@ static inline void finish_arch_post_lock_switch(void) ...@@ -185,6 +185,12 @@ static inline void finish_arch_post_lock_switch(void)
} }
} }
#define mm_untag_mask mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
return -1UL >> adi_nbits();
}
#include <asm-generic/mmu_context.h> #include <asm-generic/mmu_context.h>
#endif /* !(__ASSEMBLY__) */ #endif /* !(__ASSEMBLY__) */
......
...@@ -8,8 +8,10 @@ ...@@ -8,8 +8,10 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/mm_types.h>
#include <asm/asi.h> #include <asm/asi.h>
#include <asm/spitfire.h> #include <asm/spitfire.h>
#include <asm/pgtable.h>
#include <asm/processor.h> #include <asm/processor.h>
#include <asm-generic/access_ok.h> #include <asm-generic/access_ok.h>
......
...@@ -2290,6 +2290,17 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING ...@@ -2290,6 +2290,17 @@ config RANDOMIZE_MEMORY_PHYSICAL_PADDING
If unsure, leave at the default value. If unsure, leave at the default value.
config ADDRESS_MASKING
bool "Linear Address Masking support"
depends on X86_64
help
Linear Address Masking (LAM) modifies the checking that is applied
to 64-bit linear addresses, allowing software to use of the
untranslated address bits for metadata.
The capability can be used for efficient address sanitizers (ASAN)
implementation and for optimizations in JITs.
config HOTPLUG_CPU config HOTPLUG_CPU
def_bool y def_bool y
depends on SMP depends on SMP
......
...@@ -317,7 +317,7 @@ static struct vm_area_struct gate_vma __ro_after_init = { ...@@ -317,7 +317,7 @@ static struct vm_area_struct gate_vma __ro_after_init = {
struct vm_area_struct *get_gate_vma(struct mm_struct *mm) struct vm_area_struct *get_gate_vma(struct mm_struct *mm)
{ {
#ifdef CONFIG_COMPAT #ifdef CONFIG_COMPAT
if (!mm || !(mm->context.flags & MM_CONTEXT_HAS_VSYSCALL)) if (!mm || !test_bit(MM_CONTEXT_HAS_VSYSCALL, &mm->context.flags))
return NULL; return NULL;
#endif #endif
if (vsyscall_mode == NONE) if (vsyscall_mode == NONE)
......
...@@ -321,6 +321,7 @@ ...@@ -321,6 +321,7 @@
#define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */ #define X86_FEATURE_LKGS (12*32+18) /* "" Load "kernel" (userspace) GS */
#define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */ #define X86_FEATURE_AMX_FP16 (12*32+21) /* "" AMX fp16 Support */
#define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */ #define X86_FEATURE_AVX_IFMA (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
#define X86_FEATURE_LAM (12*32+26) /* Linear Address Masking */
/* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
......
...@@ -75,6 +75,12 @@ ...@@ -75,6 +75,12 @@
# define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31)) # define DISABLE_CALL_DEPTH_TRACKING (1 << (X86_FEATURE_CALL_DEPTH & 31))
#endif #endif
#ifdef CONFIG_ADDRESS_MASKING
# define DISABLE_LAM 0
#else
# define DISABLE_LAM (1 << (X86_FEATURE_LAM & 31))
#endif
#ifdef CONFIG_INTEL_IOMMU_SVM #ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0 # define DISABLE_ENQCMD 0
#else #else
...@@ -115,7 +121,7 @@ ...@@ -115,7 +121,7 @@
#define DISABLED_MASK10 0 #define DISABLED_MASK10 0
#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \ #define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
DISABLE_CALL_DEPTH_TRACKING) DISABLE_CALL_DEPTH_TRACKING)
#define DISABLED_MASK12 0 #define DISABLED_MASK12 (DISABLE_LAM)
#define DISABLED_MASK13 0 #define DISABLED_MASK13 0
#define DISABLED_MASK14 0 #define DISABLED_MASK14 0
#define DISABLED_MASK15 0 #define DISABLED_MASK15 0
......
...@@ -9,9 +9,13 @@ ...@@ -9,9 +9,13 @@
#include <linux/bits.h> #include <linux/bits.h>
/* Uprobes on this MM assume 32-bit code */ /* Uprobes on this MM assume 32-bit code */
#define MM_CONTEXT_UPROBE_IA32 BIT(0) #define MM_CONTEXT_UPROBE_IA32 0
/* vsyscall page is accessible on this MM */ /* vsyscall page is accessible on this MM */
#define MM_CONTEXT_HAS_VSYSCALL BIT(1) #define MM_CONTEXT_HAS_VSYSCALL 1
/* Do not allow changing LAM mode */
#define MM_CONTEXT_LOCK_LAM 2
/* Allow LAM and SVA coexisting */
#define MM_CONTEXT_FORCE_TAGGED_SVA 3
/* /*
* x86 has arch-specific MMU state beyond what lives in mm_struct. * x86 has arch-specific MMU state beyond what lives in mm_struct.
...@@ -39,7 +43,15 @@ typedef struct { ...@@ -39,7 +43,15 @@ typedef struct {
#endif #endif
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
unsigned short flags; unsigned long flags;
#endif
#ifdef CONFIG_ADDRESS_MASKING
/* Active LAM mode: X86_CR3_LAM_U48 or X86_CR3_LAM_U57 or 0 (disabled) */
unsigned long lam_cr3_mask;
/* Significant bits of the virtual address. Excludes tag bits. */
u64 untag_mask;
#endif #endif
struct mutex lock; struct mutex lock;
......
...@@ -85,6 +85,51 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next) ...@@ -85,6 +85,51 @@ static inline void switch_ldt(struct mm_struct *prev, struct mm_struct *next)
} }
#endif #endif
#ifdef CONFIG_ADDRESS_MASKING
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
return mm->context.lam_cr3_mask;
}
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
mm->context.lam_cr3_mask = oldmm->context.lam_cr3_mask;
mm->context.untag_mask = oldmm->context.untag_mask;
}
#define mm_untag_mask mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
return mm->context.untag_mask;
}
static inline void mm_reset_untag_mask(struct mm_struct *mm)
{
mm->context.untag_mask = -1UL;
}
#define arch_pgtable_dma_compat arch_pgtable_dma_compat
static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
{
return !mm_lam_cr3_mask(mm) ||
test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags);
}
#else
static inline unsigned long mm_lam_cr3_mask(struct mm_struct *mm)
{
return 0;
}
static inline void dup_lam(struct mm_struct *oldmm, struct mm_struct *mm)
{
}
static inline void mm_reset_untag_mask(struct mm_struct *mm)
{
}
#endif
#define enter_lazy_tlb enter_lazy_tlb #define enter_lazy_tlb enter_lazy_tlb
extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); extern void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);
...@@ -109,6 +154,7 @@ static inline int init_new_context(struct task_struct *tsk, ...@@ -109,6 +154,7 @@ static inline int init_new_context(struct task_struct *tsk,
mm->context.execute_only_pkey = -1; mm->context.execute_only_pkey = -1;
} }
#endif #endif
mm_reset_untag_mask(mm);
init_new_context_ldt(mm); init_new_context_ldt(mm);
return 0; return 0;
} }
...@@ -162,6 +208,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) ...@@ -162,6 +208,7 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
{ {
arch_dup_pkeys(oldmm, mm); arch_dup_pkeys(oldmm, mm);
paravirt_enter_mmap(mm); paravirt_enter_mmap(mm);
dup_lam(oldmm, mm);
return ldt_dup_context(oldmm, mm); return ldt_dup_context(oldmm, mm);
} }
...@@ -175,7 +222,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm) ...@@ -175,7 +222,7 @@ static inline void arch_exit_mmap(struct mm_struct *mm)
static inline bool is_64bit_mm(struct mm_struct *mm) static inline bool is_64bit_mm(struct mm_struct *mm)
{ {
return !IS_ENABLED(CONFIG_IA32_EMULATION) || return !IS_ENABLED(CONFIG_IA32_EMULATION) ||
!(mm->context.flags & MM_CONTEXT_UPROBE_IA32); !test_bit(MM_CONTEXT_UPROBE_IA32, &mm->context.flags);
} }
#else #else
static inline bool is_64bit_mm(struct mm_struct *mm) static inline bool is_64bit_mm(struct mm_struct *mm)
......
...@@ -28,6 +28,8 @@ ...@@ -28,6 +28,8 @@
* On systems with SME, one bit (in a variable position!) is stolen to indicate * On systems with SME, one bit (in a variable position!) is stolen to indicate
* that the top-level paging structure is encrypted. * that the top-level paging structure is encrypted.
* *
* On systemms with LAM, bits 61 and 62 are used to indicate LAM mode.
*
* All of the remaining bits indicate the physical address of the top-level * All of the remaining bits indicate the physical address of the top-level
* paging structure. * paging structure.
* *
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#ifndef _ASM_X86_TLBFLUSH_H #ifndef _ASM_X86_TLBFLUSH_H
#define _ASM_X86_TLBFLUSH_H #define _ASM_X86_TLBFLUSH_H
#include <linux/mm.h> #include <linux/mm_types.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/processor.h> #include <asm/processor.h>
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#include <asm/invpcid.h> #include <asm/invpcid.h>
#include <asm/pti.h> #include <asm/pti.h>
#include <asm/processor-flags.h> #include <asm/processor-flags.h>
#include <asm/pgtable.h>
void __flush_tlb_all(void); void __flush_tlb_all(void);
...@@ -53,6 +54,15 @@ static inline void cr4_clear_bits(unsigned long mask) ...@@ -53,6 +54,15 @@ static inline void cr4_clear_bits(unsigned long mask)
local_irq_restore(flags); local_irq_restore(flags);
} }
#ifdef CONFIG_ADDRESS_MASKING
DECLARE_PER_CPU(u64, tlbstate_untag_mask);
static inline u64 current_untag_mask(void)
{
return this_cpu_read(tlbstate_untag_mask);
}
#endif
#ifndef MODULE #ifndef MODULE
/* /*
* 6 because 6 should be plenty and struct tlb_state will fit in two cache * 6 because 6 should be plenty and struct tlb_state will fit in two cache
...@@ -101,6 +111,16 @@ struct tlb_state { ...@@ -101,6 +111,16 @@ struct tlb_state {
*/ */
bool invalidate_other; bool invalidate_other;
#ifdef CONFIG_ADDRESS_MASKING
/*
* Active LAM mode.
*
* X86_CR3_LAM_U57/U48 shifted right by X86_CR3_LAM_U57_BIT or 0 if LAM
* disabled.
*/
u8 lam;
#endif
/* /*
* Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate * Mask that contains TLB_NR_DYN_ASIDS+1 bits to indicate
* the corresponding user PCID needs a flush next time we * the corresponding user PCID needs a flush next time we
...@@ -357,6 +377,32 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd) ...@@ -357,6 +377,32 @@ static inline bool huge_pmd_needs_flush(pmd_t oldpmd, pmd_t newpmd)
} }
#define huge_pmd_needs_flush huge_pmd_needs_flush #define huge_pmd_needs_flush huge_pmd_needs_flush
#ifdef CONFIG_ADDRESS_MASKING
static inline u64 tlbstate_lam_cr3_mask(void)
{
u64 lam = this_cpu_read(cpu_tlbstate.lam);
return lam << X86_CR3_LAM_U57_BIT;
}
static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
{
this_cpu_write(cpu_tlbstate.lam,
mm->context.lam_cr3_mask >> X86_CR3_LAM_U57_BIT);
this_cpu_write(tlbstate_untag_mask, mm->context.untag_mask);
}
#else
static inline u64 tlbstate_lam_cr3_mask(void)
{
return 0;
}
static inline void set_tlbstate_lam_mode(struct mm_struct *mm)
{
}
#endif
#endif /* !MODULE */ #endif /* !MODULE */
static inline void __native_tlb_flush_global(unsigned long cr4) static inline void __native_tlb_flush_global(unsigned long cr4)
......
...@@ -7,11 +7,14 @@ ...@@ -7,11 +7,14 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/instrumented.h> #include <linux/instrumented.h>
#include <linux/kasan-checks.h> #include <linux/kasan-checks.h>
#include <linux/mm_types.h>
#include <linux/string.h> #include <linux/string.h>
#include <linux/mmap_lock.h>
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/page.h> #include <asm/page.h>
#include <asm/smap.h> #include <asm/smap.h>
#include <asm/extable.h> #include <asm/extable.h>
#include <asm/tlbflush.h>
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
static inline bool pagefault_disabled(void); static inline bool pagefault_disabled(void);
...@@ -21,6 +24,57 @@ static inline bool pagefault_disabled(void); ...@@ -21,6 +24,57 @@ static inline bool pagefault_disabled(void);
# define WARN_ON_IN_IRQ() # define WARN_ON_IN_IRQ()
#endif #endif
#ifdef CONFIG_ADDRESS_MASKING
/*
* Mask out tag bits from the address.
*
* Magic with the 'sign' allows to untag userspace pointer without any branches
* while leaving kernel addresses intact.
*/
static inline unsigned long __untagged_addr(unsigned long addr)
{
long sign;
/*
* Refer tlbstate_untag_mask directly to avoid RIP-relative relocation
* in alternative instructions. The relocation gets wrong when gets
* copied to the target place.
*/
asm (ALTERNATIVE("",
"sar $63, %[sign]\n\t" /* user_ptr ? 0 : -1UL */
"or %%gs:tlbstate_untag_mask, %[sign]\n\t"
"and %[sign], %[addr]\n\t", X86_FEATURE_LAM)
: [addr] "+r" (addr), [sign] "=r" (sign)
: "m" (tlbstate_untag_mask), "[sign]" (addr));
return addr;
}
#define untagged_addr(addr) ({ \
unsigned long __addr = (__force unsigned long)(addr); \
(__force __typeof__(addr))__untagged_addr(__addr); \
})
static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
unsigned long addr)
{
long sign = addr >> 63;
mmap_assert_locked(mm);
addr &= (mm)->context.untag_mask | sign;
return addr;
}
#define untagged_addr_remote(mm, addr) ({ \
unsigned long __addr = (__force unsigned long)(addr); \
(__force __typeof__(addr))__untagged_addr_remote(mm, __addr); \
})
#else
#define untagged_addr(addr) (addr)
#endif
/** /**
* access_ok - Checks if a user space pointer is valid * access_ok - Checks if a user space pointer is valid
* @addr: User space pointer to start of block to check * @addr: User space pointer to start of block to check
...@@ -38,10 +92,10 @@ static inline bool pagefault_disabled(void); ...@@ -38,10 +92,10 @@ static inline bool pagefault_disabled(void);
* Return: true (nonzero) if the memory block may be valid, false (zero) * Return: true (nonzero) if the memory block may be valid, false (zero)
* if it is definitely invalid. * if it is definitely invalid.
*/ */
#define access_ok(addr, size) \ #define access_ok(addr, size) \
({ \ ({ \
WARN_ON_IN_IRQ(); \ WARN_ON_IN_IRQ(); \
likely(__access_ok(addr, size)); \ likely(__access_ok(untagged_addr(addr), size)); \
}) })
#include <asm-generic/access_ok.h> #include <asm-generic/access_ok.h>
......
...@@ -23,4 +23,9 @@ ...@@ -23,4 +23,9 @@
#define ARCH_MAP_VDSO_32 0x2002 #define ARCH_MAP_VDSO_32 0x2002
#define ARCH_MAP_VDSO_64 0x2003 #define ARCH_MAP_VDSO_64 0x2003
#define ARCH_GET_UNTAG_MASK 0x4001
#define ARCH_ENABLE_TAGGED_ADDR 0x4002
#define ARCH_GET_MAX_TAG_BITS 0x4003
#define ARCH_FORCE_TAGGED_SVA 0x4004
#endif /* _ASM_X86_PRCTL_H */ #endif /* _ASM_X86_PRCTL_H */
...@@ -82,6 +82,10 @@ ...@@ -82,6 +82,10 @@
#define X86_CR3_PCID_BITS 12 #define X86_CR3_PCID_BITS 12
#define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL)) #define X86_CR3_PCID_MASK (_AC((1UL << X86_CR3_PCID_BITS) - 1, UL))
#define X86_CR3_LAM_U57_BIT 61 /* Activate LAM for userspace, 62:57 bits masked */
#define X86_CR3_LAM_U57 _BITULL(X86_CR3_LAM_U57_BIT)
#define X86_CR3_LAM_U48_BIT 62 /* Activate LAM for userspace, 62:48 bits masked */
#define X86_CR3_LAM_U48 _BITULL(X86_CR3_LAM_U48_BIT)
#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */ #define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
#define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT) #define X86_CR3_PCID_NOFLUSH _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
...@@ -132,6 +136,8 @@ ...@@ -132,6 +136,8 @@
#define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT) #define X86_CR4_PKE _BITUL(X86_CR4_PKE_BIT)
#define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */ #define X86_CR4_CET_BIT 23 /* enable Control-flow Enforcement Technology */
#define X86_CR4_CET _BITUL(X86_CR4_CET_BIT) #define X86_CR4_CET _BITUL(X86_CR4_CET_BIT)
#define X86_CR4_LAM_SUP_BIT 28 /* LAM for supervisor pointers */
#define X86_CR4_LAM_SUP _BITUL(X86_CR4_LAM_SUP_BIT)
/* /*
* x86-64 Task Priority Register, CR8 * x86-64 Task Priority Register, CR8
......
...@@ -48,6 +48,7 @@ ...@@ -48,6 +48,7 @@
#include <asm/frame.h> #include <asm/frame.h>
#include <asm/unwind.h> #include <asm/unwind.h>
#include <asm/tdx.h> #include <asm/tdx.h>
#include <asm/mmu_context.h>
#include "process.h" #include "process.h"
...@@ -162,6 +163,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) ...@@ -162,6 +163,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
savesegment(es, p->thread.es); savesegment(es, p->thread.es);
savesegment(ds, p->thread.ds); savesegment(ds, p->thread.ds);
if (p->mm && (clone_flags & (CLONE_VM | CLONE_VFORK)) == CLONE_VM)
set_bit(MM_CONTEXT_LOCK_LAM, &p->mm->context.flags);
#else #else
p->thread.sp0 = (unsigned long) (childregs + 1); p->thread.sp0 = (unsigned long) (childregs + 1);
savesegment(gs, p->thread.gs); savesegment(gs, p->thread.gs);
...@@ -368,6 +372,8 @@ void arch_setup_new_exec(void) ...@@ -368,6 +372,8 @@ void arch_setup_new_exec(void)
task_clear_spec_ssb_noexec(current); task_clear_spec_ssb_noexec(current);
speculation_ctrl_update(read_thread_flags()); speculation_ctrl_update(read_thread_flags());
} }
mm_reset_untag_mask(current->mm);
} }
#ifdef CONFIG_X86_IOPL_IOPERM #ifdef CONFIG_X86_IOPL_IOPERM
......
...@@ -671,7 +671,7 @@ void set_personality_64bit(void) ...@@ -671,7 +671,7 @@ void set_personality_64bit(void)
task_pt_regs(current)->orig_ax = __NR_execve; task_pt_regs(current)->orig_ax = __NR_execve;
current_thread_info()->status &= ~TS_COMPAT; current_thread_info()->status &= ~TS_COMPAT;
if (current->mm) if (current->mm)
current->mm->context.flags = MM_CONTEXT_HAS_VSYSCALL; __set_bit(MM_CONTEXT_HAS_VSYSCALL, &current->mm->context.flags);
/* TBD: overwrites user setup. Should have two bits. /* TBD: overwrites user setup. Should have two bits.
But 64bit processes have always behaved this way, But 64bit processes have always behaved this way,
...@@ -708,7 +708,7 @@ static void __set_personality_ia32(void) ...@@ -708,7 +708,7 @@ static void __set_personality_ia32(void)
* uprobes applied to this MM need to know this and * uprobes applied to this MM need to know this and
* cannot use user_64bit_mode() at that time. * cannot use user_64bit_mode() at that time.
*/ */
current->mm->context.flags = MM_CONTEXT_UPROBE_IA32; __set_bit(MM_CONTEXT_UPROBE_IA32, &current->mm->context.flags);
} }
current->personality |= force_personality32; current->personality |= force_personality32;
...@@ -743,6 +743,52 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr) ...@@ -743,6 +743,52 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
} }
#endif #endif
#ifdef CONFIG_ADDRESS_MASKING
#define LAM_U57_BITS 6
static int prctl_enable_tagged_addr(struct mm_struct *mm, unsigned long nr_bits)
{
if (!cpu_feature_enabled(X86_FEATURE_LAM))
return -ENODEV;
/* PTRACE_ARCH_PRCTL */
if (current->mm != mm)
return -EINVAL;
if (mm_valid_pasid(mm) &&
!test_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &mm->context.flags))
return -EINVAL;
if (mmap_write_lock_killable(mm))
return -EINTR;
if (test_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags)) {
mmap_write_unlock(mm);
return -EBUSY;
}
if (!nr_bits) {
mmap_write_unlock(mm);
return -EINVAL;
} else if (nr_bits <= LAM_U57_BITS) {
mm->context.lam_cr3_mask = X86_CR3_LAM_U57;
mm->context.untag_mask = ~GENMASK(62, 57);
} else {
mmap_write_unlock(mm);
return -EINVAL;
}
write_cr3(__read_cr3() | mm->context.lam_cr3_mask);
set_tlbstate_lam_mode(mm);
set_bit(MM_CONTEXT_LOCK_LAM, &mm->context.flags);
mmap_write_unlock(mm);
return 0;
}
#endif
long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{ {
int ret = 0; int ret = 0;
...@@ -830,7 +876,23 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2) ...@@ -830,7 +876,23 @@ long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
case ARCH_MAP_VDSO_64: case ARCH_MAP_VDSO_64:
return prctl_map_vdso(&vdso_image_64, arg2); return prctl_map_vdso(&vdso_image_64, arg2);
#endif #endif
#ifdef CONFIG_ADDRESS_MASKING
case ARCH_GET_UNTAG_MASK:
return put_user(task->mm->context.untag_mask,
(unsigned long __user *)arg2);
case ARCH_ENABLE_TAGGED_ADDR:
return prctl_enable_tagged_addr(task->mm, arg2);
case ARCH_FORCE_TAGGED_SVA:
if (current != task)
return -EINVAL;
set_bit(MM_CONTEXT_FORCE_TAGGED_SVA, &task->mm->context.flags);
return 0;
case ARCH_GET_MAX_TAG_BITS:
if (!cpu_feature_enabled(X86_FEATURE_LAM))
return put_user(0, (unsigned long __user *)arg2);
else
return put_user(LAM_U57_BITS, (unsigned long __user *)arg2);
#endif
default: default:
ret = -EINVAL; ret = -EINVAL;
break; break;
......
...@@ -671,15 +671,15 @@ static bool try_fixup_enqcmd_gp(void) ...@@ -671,15 +671,15 @@ static bool try_fixup_enqcmd_gp(void)
if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) if (!cpu_feature_enabled(X86_FEATURE_ENQCMD))
return false; return false;
pasid = current->mm->pasid;
/* /*
* If the mm has not been allocated a * If the mm has not been allocated a
* PASID, the #GP can not be fixed up. * PASID, the #GP can not be fixed up.
*/ */
if (!pasid_valid(pasid)) if (!mm_valid_pasid(current->mm))
return false; return false;
pasid = current->mm->pasid;
/* /*
* Did this thread already have its PASID activated? * Did this thread already have its PASID activated?
* If so, the #GP must be from something else. * If so, the #GP must be from something else.
......
...@@ -37,22 +37,22 @@ ...@@ -37,22 +37,22 @@
#define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC #define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
#ifdef CONFIG_X86_5LEVEL .macro check_range size:req
#define LOAD_TASK_SIZE_MINUS_N(n) \ .if IS_ENABLED(CONFIG_X86_64)
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \ mov %rax, %rdx
__stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57 sar $63, %rdx
#else or %rdx, %rax
#define LOAD_TASK_SIZE_MINUS_N(n) \ .else
mov $(TASK_SIZE_MAX - (n)),%_ASM_DX cmp $TASK_SIZE_MAX-\size+1, %eax
#endif jae .Lbad_get_user
sbb %edx, %edx /* array_index_mask_nospec() */
and %edx, %eax
.endif
.endm
.text .text
SYM_FUNC_START(__get_user_1) SYM_FUNC_START(__get_user_1)
LOAD_TASK_SIZE_MINUS_N(0) check_range size=1
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC ASM_STAC
1: movzbl (%_ASM_AX),%edx 1: movzbl (%_ASM_AX),%edx
xor %eax,%eax xor %eax,%eax
...@@ -62,11 +62,7 @@ SYM_FUNC_END(__get_user_1) ...@@ -62,11 +62,7 @@ SYM_FUNC_END(__get_user_1)
EXPORT_SYMBOL(__get_user_1) EXPORT_SYMBOL(__get_user_1)
SYM_FUNC_START(__get_user_2) SYM_FUNC_START(__get_user_2)
LOAD_TASK_SIZE_MINUS_N(1) check_range size=2
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC ASM_STAC
2: movzwl (%_ASM_AX),%edx 2: movzwl (%_ASM_AX),%edx
xor %eax,%eax xor %eax,%eax
...@@ -76,11 +72,7 @@ SYM_FUNC_END(__get_user_2) ...@@ -76,11 +72,7 @@ SYM_FUNC_END(__get_user_2)
EXPORT_SYMBOL(__get_user_2) EXPORT_SYMBOL(__get_user_2)
SYM_FUNC_START(__get_user_4) SYM_FUNC_START(__get_user_4)
LOAD_TASK_SIZE_MINUS_N(3) check_range size=4
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC ASM_STAC
3: movl (%_ASM_AX),%edx 3: movl (%_ASM_AX),%edx
xor %eax,%eax xor %eax,%eax
...@@ -90,30 +82,17 @@ SYM_FUNC_END(__get_user_4) ...@@ -90,30 +82,17 @@ SYM_FUNC_END(__get_user_4)
EXPORT_SYMBOL(__get_user_4) EXPORT_SYMBOL(__get_user_4)
SYM_FUNC_START(__get_user_8) SYM_FUNC_START(__get_user_8)
#ifdef CONFIG_X86_64 check_range size=8
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC ASM_STAC
#ifdef CONFIG_X86_64
4: movq (%_ASM_AX),%rdx 4: movq (%_ASM_AX),%rdx
xor %eax,%eax
ASM_CLAC
RET
#else #else
LOAD_TASK_SIZE_MINUS_N(7)
cmp %_ASM_DX,%_ASM_AX
jae bad_get_user_8
sbb %_ASM_DX, %_ASM_DX /* array_index_mask_nospec() */
and %_ASM_DX, %_ASM_AX
ASM_STAC
4: movl (%_ASM_AX),%edx 4: movl (%_ASM_AX),%edx
5: movl 4(%_ASM_AX),%ecx 5: movl 4(%_ASM_AX),%ecx
#endif
xor %eax,%eax xor %eax,%eax
ASM_CLAC ASM_CLAC
RET RET
#endif
SYM_FUNC_END(__get_user_8) SYM_FUNC_END(__get_user_8)
EXPORT_SYMBOL(__get_user_8) EXPORT_SYMBOL(__get_user_8)
...@@ -166,7 +145,7 @@ EXPORT_SYMBOL(__get_user_nocheck_8) ...@@ -166,7 +145,7 @@ EXPORT_SYMBOL(__get_user_nocheck_8)
SYM_CODE_START_LOCAL(.Lbad_get_user_clac) SYM_CODE_START_LOCAL(.Lbad_get_user_clac)
ASM_CLAC ASM_CLAC
bad_get_user: .Lbad_get_user:
xor %edx,%edx xor %edx,%edx
mov $(-EFAULT),%_ASM_AX mov $(-EFAULT),%_ASM_AX
RET RET
...@@ -184,23 +163,23 @@ SYM_CODE_END(.Lbad_get_user_8_clac) ...@@ -184,23 +163,23 @@ SYM_CODE_END(.Lbad_get_user_8_clac)
#endif #endif
/* get_user */ /* get_user */
_ASM_EXTABLE_UA(1b, .Lbad_get_user_clac) _ASM_EXTABLE(1b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_get_user_clac) _ASM_EXTABLE(2b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(3b, .Lbad_get_user_clac) _ASM_EXTABLE(3b, .Lbad_get_user_clac)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
_ASM_EXTABLE_UA(4b, .Lbad_get_user_clac) _ASM_EXTABLE(4b, .Lbad_get_user_clac)
#else #else
_ASM_EXTABLE_UA(4b, .Lbad_get_user_8_clac) _ASM_EXTABLE(4b, .Lbad_get_user_8_clac)
_ASM_EXTABLE_UA(5b, .Lbad_get_user_8_clac) _ASM_EXTABLE(5b, .Lbad_get_user_8_clac)
#endif #endif
/* __get_user */ /* __get_user */
_ASM_EXTABLE_UA(6b, .Lbad_get_user_clac) _ASM_EXTABLE(6b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(7b, .Lbad_get_user_clac) _ASM_EXTABLE(7b, .Lbad_get_user_clac)
_ASM_EXTABLE_UA(8b, .Lbad_get_user_clac) _ASM_EXTABLE(8b, .Lbad_get_user_clac)
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
_ASM_EXTABLE_UA(9b, .Lbad_get_user_clac) _ASM_EXTABLE(9b, .Lbad_get_user_clac)
#else #else
_ASM_EXTABLE_UA(9b, .Lbad_get_user_8_clac) _ASM_EXTABLE(9b, .Lbad_get_user_8_clac)
_ASM_EXTABLE_UA(10b, .Lbad_get_user_8_clac) _ASM_EXTABLE(10b, .Lbad_get_user_8_clac)
#endif #endif
...@@ -33,20 +33,20 @@ ...@@ -33,20 +33,20 @@
* as they get called from within inline assembly. * as they get called from within inline assembly.
*/ */
#ifdef CONFIG_X86_5LEVEL .macro check_range size:req
#define LOAD_TASK_SIZE_MINUS_N(n) \ .if IS_ENABLED(CONFIG_X86_64)
ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \ mov %rcx, %rbx
__stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57 sar $63, %rbx
#else or %rbx, %rcx
#define LOAD_TASK_SIZE_MINUS_N(n) \ .else
mov $(TASK_SIZE_MAX - (n)),%_ASM_BX cmp $TASK_SIZE_MAX-\size+1, %ecx
#endif jae .Lbad_put_user
.endif
.endm
.text .text
SYM_FUNC_START(__put_user_1) SYM_FUNC_START(__put_user_1)
LOAD_TASK_SIZE_MINUS_N(0) check_range size=1
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
ASM_STAC ASM_STAC
1: movb %al,(%_ASM_CX) 1: movb %al,(%_ASM_CX)
xor %ecx,%ecx xor %ecx,%ecx
...@@ -66,9 +66,7 @@ SYM_FUNC_END(__put_user_nocheck_1) ...@@ -66,9 +66,7 @@ SYM_FUNC_END(__put_user_nocheck_1)
EXPORT_SYMBOL(__put_user_nocheck_1) EXPORT_SYMBOL(__put_user_nocheck_1)
SYM_FUNC_START(__put_user_2) SYM_FUNC_START(__put_user_2)
LOAD_TASK_SIZE_MINUS_N(1) check_range size=2
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
ASM_STAC ASM_STAC
3: movw %ax,(%_ASM_CX) 3: movw %ax,(%_ASM_CX)
xor %ecx,%ecx xor %ecx,%ecx
...@@ -88,9 +86,7 @@ SYM_FUNC_END(__put_user_nocheck_2) ...@@ -88,9 +86,7 @@ SYM_FUNC_END(__put_user_nocheck_2)
EXPORT_SYMBOL(__put_user_nocheck_2) EXPORT_SYMBOL(__put_user_nocheck_2)
SYM_FUNC_START(__put_user_4) SYM_FUNC_START(__put_user_4)
LOAD_TASK_SIZE_MINUS_N(3) check_range size=4
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
ASM_STAC ASM_STAC
5: movl %eax,(%_ASM_CX) 5: movl %eax,(%_ASM_CX)
xor %ecx,%ecx xor %ecx,%ecx
...@@ -110,9 +106,7 @@ SYM_FUNC_END(__put_user_nocheck_4) ...@@ -110,9 +106,7 @@ SYM_FUNC_END(__put_user_nocheck_4)
EXPORT_SYMBOL(__put_user_nocheck_4) EXPORT_SYMBOL(__put_user_nocheck_4)
SYM_FUNC_START(__put_user_8) SYM_FUNC_START(__put_user_8)
LOAD_TASK_SIZE_MINUS_N(7) check_range size=8
cmp %_ASM_BX,%_ASM_CX
jae .Lbad_put_user
ASM_STAC ASM_STAC
7: mov %_ASM_AX,(%_ASM_CX) 7: mov %_ASM_AX,(%_ASM_CX)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
...@@ -144,15 +138,15 @@ SYM_CODE_START_LOCAL(.Lbad_put_user_clac) ...@@ -144,15 +138,15 @@ SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
RET RET
SYM_CODE_END(.Lbad_put_user_clac) SYM_CODE_END(.Lbad_put_user_clac)
_ASM_EXTABLE_UA(1b, .Lbad_put_user_clac) _ASM_EXTABLE(1b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac) _ASM_EXTABLE(2b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(3b, .Lbad_put_user_clac) _ASM_EXTABLE(3b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(4b, .Lbad_put_user_clac) _ASM_EXTABLE(4b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(5b, .Lbad_put_user_clac) _ASM_EXTABLE(5b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(6b, .Lbad_put_user_clac) _ASM_EXTABLE(6b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(7b, .Lbad_put_user_clac) _ASM_EXTABLE(7b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(9b, .Lbad_put_user_clac) _ASM_EXTABLE(9b, .Lbad_put_user_clac)
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
_ASM_EXTABLE_UA(8b, .Lbad_put_user_clac) _ASM_EXTABLE(8b, .Lbad_put_user_clac)
_ASM_EXTABLE_UA(10b, .Lbad_put_user_clac) _ASM_EXTABLE(10b, .Lbad_put_user_clac)
#endif #endif
...@@ -1048,6 +1048,11 @@ __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = { ...@@ -1048,6 +1048,11 @@ __visible DEFINE_PER_CPU_ALIGNED(struct tlb_state, cpu_tlbstate) = {
.cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */
}; };
#ifdef CONFIG_ADDRESS_MASKING
DEFINE_PER_CPU(u64, tlbstate_untag_mask);
EXPORT_PER_CPU_SYMBOL(tlbstate_untag_mask);
#endif
void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache) void update_cache_mode_entry(unsigned entry, enum page_cache_mode cache)
{ {
/* entry 0 MUST be WB (hardwired to speed up translations) */ /* entry 0 MUST be WB (hardwired to speed up translations) */
......
...@@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid) ...@@ -154,26 +154,30 @@ static inline u16 user_pcid(u16 asid)
return ret; return ret;
} }
static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3(pgd_t *pgd, u16 asid, unsigned long lam)
{ {
unsigned long cr3 = __sme_pa(pgd) | lam;
if (static_cpu_has(X86_FEATURE_PCID)) { if (static_cpu_has(X86_FEATURE_PCID)) {
return __sme_pa(pgd) | kern_pcid(asid); VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
cr3 |= kern_pcid(asid);
} else { } else {
VM_WARN_ON_ONCE(asid != 0); VM_WARN_ON_ONCE(asid != 0);
return __sme_pa(pgd);
} }
return cr3;
} }
static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid,
unsigned long lam)
{ {
VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE);
/* /*
* Use boot_cpu_has() instead of this_cpu_has() as this function * Use boot_cpu_has() instead of this_cpu_has() as this function
* might be called during early boot. This should work even after * might be called during early boot. This should work even after
* boot because all CPU's the have same capabilities: * boot because all CPU's the have same capabilities:
*/ */
VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID));
return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; return build_cr3(pgd, asid, lam) | CR3_NOFLUSH;
} }
/* /*
...@@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid) ...@@ -274,15 +278,16 @@ static inline void invalidate_user_asid(u16 asid)
(unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask)); (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
} }
static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, bool need_flush) static void load_new_mm_cr3(pgd_t *pgdir, u16 new_asid, unsigned long lam,
bool need_flush)
{ {
unsigned long new_mm_cr3; unsigned long new_mm_cr3;
if (need_flush) { if (need_flush) {
invalidate_user_asid(new_asid); invalidate_user_asid(new_asid);
new_mm_cr3 = build_cr3(pgdir, new_asid); new_mm_cr3 = build_cr3(pgdir, new_asid, lam);
} else { } else {
new_mm_cr3 = build_cr3_noflush(pgdir, new_asid); new_mm_cr3 = build_cr3_noflush(pgdir, new_asid, lam);
} }
/* /*
...@@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -491,6 +496,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
{ {
struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm); struct mm_struct *real_prev = this_cpu_read(cpu_tlbstate.loaded_mm);
u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid); u16 prev_asid = this_cpu_read(cpu_tlbstate.loaded_mm_asid);
unsigned long new_lam = mm_lam_cr3_mask(next);
bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy); bool was_lazy = this_cpu_read(cpu_tlbstate_shared.is_lazy);
unsigned cpu = smp_processor_id(); unsigned cpu = smp_processor_id();
u64 next_tlb_gen; u64 next_tlb_gen;
...@@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -520,7 +526,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* isn't free. * isn't free.
*/ */
#ifdef CONFIG_DEBUG_VM #ifdef CONFIG_DEBUG_VM
if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid))) { if (WARN_ON_ONCE(__read_cr3() != build_cr3(real_prev->pgd, prev_asid,
tlbstate_lam_cr3_mask()))) {
/* /*
* If we were to BUG here, we'd be very likely to kill * If we were to BUG here, we'd be very likely to kill
* the system so hard that we don't see the call trace. * the system so hard that we don't see the call trace.
...@@ -552,9 +559,15 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -552,9 +559,15 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
* instruction. * instruction.
*/ */
if (real_prev == next) { if (real_prev == next) {
/* Not actually switching mm's */
VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) != VM_WARN_ON(this_cpu_read(cpu_tlbstate.ctxs[prev_asid].ctx_id) !=
next->context.ctx_id); next->context.ctx_id);
/*
* If this races with another thread that enables lam, 'new_lam'
* might not match tlbstate_lam_cr3_mask().
*/
/* /*
* Even in lazy TLB mode, the CPU should stay set in the * Even in lazy TLB mode, the CPU should stay set in the
* mm_cpumask. The TLB shootdown code can figure out from * mm_cpumask. The TLB shootdown code can figure out from
...@@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, ...@@ -622,15 +635,16 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
barrier(); barrier();
} }
set_tlbstate_lam_mode(next);
if (need_flush) { if (need_flush) {
this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[new_asid].ctx_id, next->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen); this_cpu_write(cpu_tlbstate.ctxs[new_asid].tlb_gen, next_tlb_gen);
load_new_mm_cr3(next->pgd, new_asid, true); load_new_mm_cr3(next->pgd, new_asid, new_lam, true);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, TLB_FLUSH_ALL);
} else { } else {
/* The new ASID is already up to date. */ /* The new ASID is already up to date. */
load_new_mm_cr3(next->pgd, new_asid, false); load_new_mm_cr3(next->pgd, new_asid, new_lam, false);
trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0); trace_tlb_flush(TLB_FLUSH_ON_TASK_SWITCH, 0);
} }
...@@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void) ...@@ -691,6 +705,10 @@ void initialize_tlbstate_and_flush(void)
/* Assert that CR3 already references the right mm. */ /* Assert that CR3 already references the right mm. */
WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd)); WARN_ON((cr3 & CR3_ADDR_MASK) != __pa(mm->pgd));
/* LAM expected to be disabled */
WARN_ON(cr3 & (X86_CR3_LAM_U48 | X86_CR3_LAM_U57));
WARN_ON(mm_lam_cr3_mask(mm));
/* /*
* Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization * Assert that CR4.PCIDE is set if needed. (CR4.PCIDE initialization
* doesn't work like other CR4 bits because it can only be set from * doesn't work like other CR4 bits because it can only be set from
...@@ -699,8 +717,8 @@ void initialize_tlbstate_and_flush(void) ...@@ -699,8 +717,8 @@ void initialize_tlbstate_and_flush(void)
WARN_ON(boot_cpu_has(X86_FEATURE_PCID) && WARN_ON(boot_cpu_has(X86_FEATURE_PCID) &&
!(cr4_read_shadow() & X86_CR4_PCIDE)); !(cr4_read_shadow() & X86_CR4_PCIDE));
/* Force ASID 0 and force a TLB flush. */ /* Disable LAM, force ASID 0 and force a TLB flush. */
write_cr3(build_cr3(mm->pgd, 0)); write_cr3(build_cr3(mm->pgd, 0, 0));
/* Reinitialize tlbstate. */ /* Reinitialize tlbstate. */
this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT); this_cpu_write(cpu_tlbstate.last_user_mm_spec, LAST_USER_MM_INIT);
...@@ -708,6 +726,7 @@ void initialize_tlbstate_and_flush(void) ...@@ -708,6 +726,7 @@ void initialize_tlbstate_and_flush(void)
this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.next_asid, 1);
this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id);
this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen); this_cpu_write(cpu_tlbstate.ctxs[0].tlb_gen, tlb_gen);
set_tlbstate_lam_mode(mm);
for (i = 1; i < TLB_NR_DYN_ASIDS; i++) for (i = 1; i < TLB_NR_DYN_ASIDS; i++)
this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0); this_cpu_write(cpu_tlbstate.ctxs[i].ctx_id, 0);
...@@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) ...@@ -1071,8 +1090,10 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
*/ */
unsigned long __get_current_cr3_fast(void) unsigned long __get_current_cr3_fast(void)
{ {
unsigned long cr3 = build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd, unsigned long cr3 =
this_cpu_read(cpu_tlbstate.loaded_mm_asid)); build_cr3(this_cpu_read(cpu_tlbstate.loaded_mm)->pgd,
this_cpu_read(cpu_tlbstate.loaded_mm_asid),
tlbstate_lam_cr3_mask());
/* For now, be very restrictive about when this can be called. */ /* For now, be very restrictive about when this can be called. */
VM_WARN_ON(in_nmi() || preemptible()); VM_WARN_ON(in_nmi() || preemptible());
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
/* /*
* Helpers for IOMMU drivers implementing SVA * Helpers for IOMMU drivers implementing SVA
*/ */
#include <linux/mmu_context.h>
#include <linux/mutex.h> #include <linux/mutex.h>
#include <linux/sched/mm.h> #include <linux/sched/mm.h>
#include <linux/iommu.h> #include <linux/iommu.h>
...@@ -32,16 +33,19 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max) ...@@ -32,16 +33,19 @@ int iommu_sva_alloc_pasid(struct mm_struct *mm, ioasid_t min, ioasid_t max)
min == 0 || max < min) min == 0 || max < min)
return -EINVAL; return -EINVAL;
if (!arch_pgtable_dma_compat(mm))
return -EBUSY;
mutex_lock(&iommu_sva_lock); mutex_lock(&iommu_sva_lock);
/* Is a PASID already associated with this mm? */ /* Is a PASID already associated with this mm? */
if (pasid_valid(mm->pasid)) { if (mm_valid_pasid(mm)) {
if (mm->pasid < min || mm->pasid >= max) if (mm->pasid < min || mm->pasid >= max)
ret = -EOVERFLOW; ret = -EOVERFLOW;
goto out; goto out;
} }
pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm); pasid = ioasid_alloc(&iommu_sva_pasid, min, max, mm);
if (!pasid_valid(pasid)) if (pasid == INVALID_IOASID)
ret = -ENOMEM; ret = -ENOMEM;
else else
mm_pasid_set(mm, pasid); mm_pasid_set(mm, pasid);
......
...@@ -580,7 +580,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, ...@@ -580,7 +580,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
goto done; goto done;
} }
vaddr = untagged_addr(vaddr); vaddr = untagged_addr_remote(mm, vaddr);
retry: retry:
vma = vma_lookup(mm, vaddr); vma = vma_lookup(mm, vaddr);
......
...@@ -91,6 +91,7 @@ ...@@ -91,6 +91,7 @@
#include <linux/user_namespace.h> #include <linux/user_namespace.h>
#include <linux/fs_struct.h> #include <linux/fs_struct.h>
#include <linux/kthread.h> #include <linux/kthread.h>
#include <linux/mmu_context.h>
#include <asm/processor.h> #include <asm/processor.h>
#include "internal.h" #include "internal.h"
...@@ -425,6 +426,11 @@ static inline void task_thp_status(struct seq_file *m, struct mm_struct *mm) ...@@ -425,6 +426,11 @@ static inline void task_thp_status(struct seq_file *m, struct mm_struct *mm)
seq_printf(m, "THP_enabled:\t%d\n", thp_enabled); seq_printf(m, "THP_enabled:\t%d\n", thp_enabled);
} }
static inline void task_untag_mask(struct seq_file *m, struct mm_struct *mm)
{
seq_printf(m, "untag_mask:\t%#lx\n", mm_untag_mask(mm));
}
int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task) struct pid *pid, struct task_struct *task)
{ {
...@@ -440,6 +446,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, ...@@ -440,6 +446,7 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
task_mem(m, mm); task_mem(m, mm);
task_core_dumping(m, task); task_core_dumping(m, task);
task_thp_status(m, mm); task_thp_status(m, mm);
task_untag_mask(m, mm);
mmput(mm); mmput(mm);
} }
task_sig(m, task); task_sig(m, task);
......
...@@ -1688,8 +1688,13 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, ...@@ -1688,8 +1688,13 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
/* watch out for wraparound */ /* watch out for wraparound */
start_vaddr = end_vaddr; start_vaddr = end_vaddr;
if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) if (svpfn <= (ULONG_MAX >> PAGE_SHIFT)) {
start_vaddr = untagged_addr(svpfn << PAGE_SHIFT); ret = mmap_read_lock_killable(mm);
if (ret)
goto out_free;
start_vaddr = untagged_addr_remote(mm, svpfn << PAGE_SHIFT);
mmap_read_unlock(mm);
}
/* Ensure the address is inside the task */ /* Ensure the address is inside the task */
if (start_vaddr > mm->task_size) if (start_vaddr > mm->task_size)
......
...@@ -40,10 +40,6 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid, ...@@ -40,10 +40,6 @@ void *ioasid_find(struct ioasid_set *set, ioasid_t ioasid,
int ioasid_register_allocator(struct ioasid_allocator_ops *allocator); int ioasid_register_allocator(struct ioasid_allocator_ops *allocator);
void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator); void ioasid_unregister_allocator(struct ioasid_allocator_ops *allocator);
int ioasid_set_data(ioasid_t ioasid, void *data); int ioasid_set_data(ioasid_t ioasid, void *data);
static inline bool pasid_valid(ioasid_t ioasid)
{
return ioasid != INVALID_IOASID;
}
#else /* !CONFIG_IOASID */ #else /* !CONFIG_IOASID */
static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min, static inline ioasid_t ioasid_alloc(struct ioasid_set *set, ioasid_t min,
...@@ -74,10 +70,5 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data) ...@@ -74,10 +70,5 @@ static inline int ioasid_set_data(ioasid_t ioasid, void *data)
return -ENOTSUPP; return -ENOTSUPP;
} }
static inline bool pasid_valid(ioasid_t ioasid)
{
return false;
}
#endif /* CONFIG_IOASID */ #endif /* CONFIG_IOASID */
#endif /* __LINUX_IOASID_H */ #endif /* __LINUX_IOASID_H */
...@@ -98,17 +98,6 @@ extern int mmap_rnd_compat_bits __read_mostly; ...@@ -98,17 +98,6 @@ extern int mmap_rnd_compat_bits __read_mostly;
#include <asm/page.h> #include <asm/page.h>
#include <asm/processor.h> #include <asm/processor.h>
/*
* Architectures that support memory tagging (assigning tags to memory regions,
* embedding these tags into addresses that point to these memory regions, and
* checking that the memory and the pointer tags match on memory accesses)
* redefine this macro to strip tags from pointers.
* It's defined as noop for architectures that don't support memory tagging.
*/
#ifndef untagged_addr
#define untagged_addr(addr) (addr)
#endif
#ifndef __pa_symbol #ifndef __pa_symbol
#define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0))
#endif #endif
......
...@@ -28,4 +28,18 @@ static inline void leave_mm(int cpu) { } ...@@ -28,4 +28,18 @@ static inline void leave_mm(int cpu) { }
# define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p)) # define task_cpu_possible(cpu, p) cpumask_test_cpu((cpu), task_cpu_possible_mask(p))
#endif #endif
#ifndef mm_untag_mask
static inline unsigned long mm_untag_mask(struct mm_struct *mm)
{
return -1UL;
}
#endif
#ifndef arch_pgtable_dma_compat
static inline bool arch_pgtable_dma_compat(struct mm_struct *mm)
{
return true;
}
#endif
#endif #endif
...@@ -485,6 +485,11 @@ static inline void mm_pasid_init(struct mm_struct *mm) ...@@ -485,6 +485,11 @@ static inline void mm_pasid_init(struct mm_struct *mm)
mm->pasid = INVALID_IOASID; mm->pasid = INVALID_IOASID;
} }
static inline bool mm_valid_pasid(struct mm_struct *mm)
{
return mm->pasid != INVALID_IOASID;
}
/* Associate a PASID with an mm_struct: */ /* Associate a PASID with an mm_struct: */
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
{ {
...@@ -493,13 +498,14 @@ static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) ...@@ -493,13 +498,14 @@ static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid)
static inline void mm_pasid_drop(struct mm_struct *mm) static inline void mm_pasid_drop(struct mm_struct *mm)
{ {
if (pasid_valid(mm->pasid)) { if (mm_valid_pasid(mm)) {
ioasid_free(mm->pasid); ioasid_free(mm->pasid);
mm->pasid = INVALID_IOASID; mm->pasid = INVALID_IOASID;
} }
} }
#else #else
static inline void mm_pasid_init(struct mm_struct *mm) {} static inline void mm_pasid_init(struct mm_struct *mm) {}
static inline bool mm_valid_pasid(struct mm_struct *mm) { return false; }
static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {} static inline void mm_pasid_set(struct mm_struct *mm, u32 pasid) {}
static inline void mm_pasid_drop(struct mm_struct *mm) {} static inline void mm_pasid_drop(struct mm_struct *mm) {}
#endif #endif
......
...@@ -10,6 +10,28 @@ ...@@ -10,6 +10,28 @@
#include <asm/uaccess.h> #include <asm/uaccess.h>
/*
* Architectures that support memory tagging (assigning tags to memory regions,
* embedding these tags into addresses that point to these memory regions, and
* checking that the memory and the pointer tags match on memory accesses)
* redefine this macro to strip tags from pointers.
*
* Passing down mm_struct allows to define untagging rules on per-process
* basis.
*
* It's defined as noop for architectures that don't support memory tagging.
*/
#ifndef untagged_addr
#define untagged_addr(addr) (addr)
#endif
#ifndef untagged_addr_remote
#define untagged_addr_remote(mm, addr) ({ \
mmap_assert_locked(mm); \
untagged_addr(addr); \
})
#endif
/* /*
* Architectures should provide two primitives (raw_copy_{to,from}_user()) * Architectures should provide two primitives (raw_copy_{to,from}_user())
* and get rid of their private instances of copy_{to,from}_user() and * and get rid of their private instances of copy_{to,from}_user() and
......
...@@ -1085,7 +1085,7 @@ static long __get_user_pages(struct mm_struct *mm, ...@@ -1085,7 +1085,7 @@ static long __get_user_pages(struct mm_struct *mm,
if (!nr_pages) if (!nr_pages)
return 0; return 0;
start = untagged_addr(start); start = untagged_addr_remote(mm, start);
VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN))); VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
...@@ -1259,7 +1259,7 @@ int fixup_user_fault(struct mm_struct *mm, ...@@ -1259,7 +1259,7 @@ int fixup_user_fault(struct mm_struct *mm,
struct vm_area_struct *vma; struct vm_area_struct *vma;
vm_fault_t ret; vm_fault_t ret;
address = untagged_addr(address); address = untagged_addr_remote(mm, address);
if (unlocked) if (unlocked)
fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
......
...@@ -1390,8 +1390,6 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh ...@@ -1390,8 +1390,6 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
size_t len; size_t len;
struct blk_plug plug; struct blk_plug plug;
start = untagged_addr(start);
if (!madvise_behavior_valid(behavior)) if (!madvise_behavior_valid(behavior))
return -EINVAL; return -EINVAL;
...@@ -1423,6 +1421,9 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh ...@@ -1423,6 +1421,9 @@ int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int beh
mmap_read_lock(mm); mmap_read_lock(mm);
} }
start = untagged_addr_remote(mm, start);
end = start + len;
blk_start_plug(&plug); blk_start_plug(&plug);
error = madvise_walk_vmas(mm, start, end, behavior, error = madvise_walk_vmas(mm, start, end, behavior,
madvise_vma_behavior); madvise_vma_behavior);
......
...@@ -2099,15 +2099,18 @@ static int do_move_pages_to_node(struct mm_struct *mm, ...@@ -2099,15 +2099,18 @@ static int do_move_pages_to_node(struct mm_struct *mm,
* target node * target node
* 1 - when it has been queued * 1 - when it has been queued
*/ */
static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, static int add_page_for_migration(struct mm_struct *mm, const void __user *p,
int node, struct list_head *pagelist, bool migrate_all) int node, struct list_head *pagelist, bool migrate_all)
{ {
struct vm_area_struct *vma; struct vm_area_struct *vma;
unsigned long addr;
struct page *page; struct page *page;
int err; int err;
bool isolated; bool isolated;
mmap_read_lock(mm); mmap_read_lock(mm);
addr = (unsigned long)untagged_addr_remote(mm, p);
err = -EFAULT; err = -EFAULT;
vma = vma_lookup(mm, addr); vma = vma_lookup(mm, addr);
if (!vma || !vma_migratable(vma)) if (!vma || !vma_migratable(vma))
...@@ -2213,7 +2216,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, ...@@ -2213,7 +2216,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
for (i = start = 0; i < nr_pages; i++) { for (i = start = 0; i < nr_pages; i++) {
const void __user *p; const void __user *p;
unsigned long addr;
int node; int node;
err = -EFAULT; err = -EFAULT;
...@@ -2221,7 +2223,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, ...@@ -2221,7 +2223,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
goto out_flush; goto out_flush;
if (get_user(node, nodes + i)) if (get_user(node, nodes + i))
goto out_flush; goto out_flush;
addr = (unsigned long)untagged_addr(p);
err = -ENODEV; err = -ENODEV;
if (node < 0 || node >= MAX_NUMNODES) if (node < 0 || node >= MAX_NUMNODES)
...@@ -2249,8 +2250,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes, ...@@ -2249,8 +2250,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
* Errors in the page lookup or isolation are not fatal and we simply * Errors in the page lookup or isolation are not fatal and we simply
* report them via status * report them via status
*/ */
err = add_page_for_migration(mm, addr, current_node, err = add_page_for_migration(mm, p, current_node, &pagelist,
&pagelist, flags & MPOL_MF_MOVE_ALL); flags & MPOL_MF_MOVE_ALL);
if (err > 0) { if (err > 0) {
/* The page is successfully queued for migration */ /* The page is successfully queued for migration */
......
...@@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ ...@@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \ test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer vdso_restorer
TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
corrupt_xstate_header amx corrupt_xstate_header amx lam
# Some selftests require 32bit support enabled also on 64bit systems # Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment