Commit 07f8ba74 authored by Palmer Dabbelt's avatar Palmer Dabbelt

RISC-V: User-Visible Changes

This merge contains the user-visible, ABI-breaking changes that we want
to make sure we have in Linux before our first release.   Highlights
include:

* VDSO entries for clock_get/gettimeofday/getcpu have been added.  These
  are simple syscalls now, but we want to let glibc use them from the
  start so we can make them faster later.
* A VDSO entry for instruction cache flushing has been added so
  userspace can flush the instruction cache.
* The VDSO symbol versions for __vdso_cmpxchg{32,64} have been removed,
  as those VDSO entries don't actually exist.

Conflicts:
        arch/riscv/include/asm/tlbflush.h
parents f8182f61 0e710ac6
...@@ -18,22 +18,44 @@ ...@@ -18,22 +18,44 @@
#undef flush_icache_range #undef flush_icache_range
#undef flush_icache_user_range #undef flush_icache_user_range
#undef flush_dcache_page
static inline void local_flush_icache_all(void) static inline void local_flush_icache_all(void)
{ {
asm volatile ("fence.i" ::: "memory"); asm volatile ("fence.i" ::: "memory");
} }
#define PG_dcache_clean PG_arch_1
static inline void flush_dcache_page(struct page *page)
{
if (test_bit(PG_dcache_clean, &page->flags))
clear_bit(PG_dcache_clean, &page->flags);
}
/*
* RISC-V doesn't have an instruction to flush parts of the instruction cache,
* so instead we just flush the whole thing.
*/
#define flush_icache_range(start, end) flush_icache_all()
#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all()
#ifndef CONFIG_SMP #ifndef CONFIG_SMP
#define flush_icache_range(start, end) local_flush_icache_all() #define flush_icache_all() local_flush_icache_all()
#define flush_icache_user_range(vma, pg, addr, len) local_flush_icache_all() #define flush_icache_mm(mm, local) flush_icache_all()
#else /* CONFIG_SMP */ #else /* CONFIG_SMP */
#define flush_icache_range(start, end) sbi_remote_fence_i(0) #define flush_icache_all() sbi_remote_fence_i(0)
#define flush_icache_user_range(vma, pg, addr, len) sbi_remote_fence_i(0) void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */ #endif /* CONFIG_SMP */
/*
* Bits in sys_riscv_flush_icache()'s flags argument.
*/
#define SYS_RISCV_FLUSH_ICACHE_LOCAL 1UL
#define SYS_RISCV_FLUSH_ICACHE_ALL (SYS_RISCV_FLUSH_ICACHE_LOCAL)
#endif /* _ASM_RISCV_CACHEFLUSH_H */ #endif /* _ASM_RISCV_CACHEFLUSH_H */
...@@ -19,6 +19,10 @@ ...@@ -19,6 +19,10 @@
typedef struct { typedef struct {
void *vdso; void *vdso;
#ifdef CONFIG_SMP
/* A local icache flush is needed before user execution can resume. */
cpumask_t icache_stale_mask;
#endif
} mm_context_t; } mm_context_t;
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
......
/* /*
* Copyright (C) 2012 Regents of the University of California * Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2017 SiFive
* *
* This program is free software; you can redistribute it and/or * This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License * modify it under the terms of the GNU General Public License
...@@ -19,6 +20,7 @@ ...@@ -19,6 +20,7 @@
#include <linux/mm.h> #include <linux/mm.h>
#include <linux/sched.h> #include <linux/sched.h>
#include <asm/tlbflush.h> #include <asm/tlbflush.h>
#include <asm/cacheflush.h>
static inline void enter_lazy_tlb(struct mm_struct *mm, static inline void enter_lazy_tlb(struct mm_struct *mm,
struct task_struct *task) struct task_struct *task)
...@@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd) ...@@ -46,12 +48,54 @@ static inline void set_pgdir(pgd_t *pgd)
csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE); csr_write(sptbr, virt_to_pfn(pgd) | SPTBR_MODE);
} }
/*
* When necessary, performs a deferred icache flush for the given MM context,
* on the local CPU. RISC-V has no direct mechanism for instruction cache
* shoot downs, so instead we send an IPI that informs the remote harts they
* need to flush their local instruction caches. To avoid pathologically slow
* behavior in a common case (a bunch of single-hart processes on a many-hart
* machine, ie 'make -j') we avoid the IPIs for harts that are not currently
* executing a MM context and instead schedule a deferred local instruction
* cache flush to be performed before execution resumes on each hart. This
* actually performs that local instruction cache flush, which implicitly only
* refers to the current hart.
*/
static inline void flush_icache_deferred(struct mm_struct *mm)
{
#ifdef CONFIG_SMP
unsigned int cpu = smp_processor_id();
cpumask_t *mask = &mm->context.icache_stale_mask;
if (cpumask_test_cpu(cpu, mask)) {
cpumask_clear_cpu(cpu, mask);
/*
* Ensure the remote hart's writes are visible to this hart.
* This pairs with a barrier in flush_icache_mm.
*/
smp_mb();
local_flush_icache_all();
}
#endif
}
static inline void switch_mm(struct mm_struct *prev, static inline void switch_mm(struct mm_struct *prev,
struct mm_struct *next, struct task_struct *task) struct mm_struct *next, struct task_struct *task)
{ {
if (likely(prev != next)) { if (likely(prev != next)) {
/*
* Mark the current MM context as inactive, and the next as
* active. This is at least used by the icache flushing
* routines in order to determine who should
*/
unsigned int cpu = smp_processor_id();
cpumask_clear_cpu(cpu, mm_cpumask(prev));
cpumask_set_cpu(cpu, mm_cpumask(next));
set_pgdir(next->pgd); set_pgdir(next->pgd);
local_flush_tlb_all(); local_flush_tlb_all();
flush_icache_deferred(next);
} }
} }
......
...@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr) ...@@ -178,28 +178,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long addr)
#define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map(dir, addr) pte_offset_kernel((dir), (addr))
#define pte_unmap(pte) ((void)(pte)) #define pte_unmap(pte) ((void)(pte))
/*
* Certain architectures need to do special things when PTEs within
* a page table are directly modified. Thus, the following hook is
* made available.
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
set_pte(ptep, pteval);
}
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
set_pte_at(mm, addr, ptep, __pte(0));
}
static inline int pte_present(pte_t pte) static inline int pte_present(pte_t pte)
{ {
return (pte_val(pte) & _PAGE_PRESENT); return (pte_val(pte) & _PAGE_PRESENT);
...@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte) ...@@ -210,21 +188,22 @@ static inline int pte_none(pte_t pte)
return (pte_val(pte) == 0); return (pte_val(pte) == 0);
} }
/* static inline int pte_read(pte_t pte) */
static inline int pte_write(pte_t pte) static inline int pte_write(pte_t pte)
{ {
return pte_val(pte) & _PAGE_WRITE; return pte_val(pte) & _PAGE_WRITE;
} }
static inline int pte_exec(pte_t pte)
{
return pte_val(pte) & _PAGE_EXEC;
}
static inline int pte_huge(pte_t pte) static inline int pte_huge(pte_t pte)
{ {
return pte_present(pte) return pte_present(pte)
&& (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); && (pte_val(pte) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC));
} }
/* static inline int pte_exec(pte_t pte) */
static inline int pte_dirty(pte_t pte) static inline int pte_dirty(pte_t pte)
{ {
return pte_val(pte) & _PAGE_DIRTY; return pte_val(pte) & _PAGE_DIRTY;
...@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) ...@@ -311,6 +290,33 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b)
return pte_val(pte_a) == pte_val(pte_b); return pte_val(pte_a) == pte_val(pte_b);
} }
/*
* Certain architectures need to do special things when PTEs within
* a page table are directly modified. Thus, the following hook is
* made available.
*/
static inline void set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}
void flush_icache_pte(pte_t pte);
static inline void set_pte_at(struct mm_struct *mm,
unsigned long addr, pte_t *ptep, pte_t pteval)
{
if (pte_present(pteval) && pte_exec(pteval))
flush_icache_pte(pteval);
set_pte(ptep, pteval);
}
static inline void pte_clear(struct mm_struct *mm,
unsigned long addr, pte_t *ptep)
{
set_pte_at(mm, addr, ptep, __pte(0));
}
#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
static inline int ptep_set_access_flags(struct vm_area_struct *vma, static inline int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep, unsigned long address, pte_t *ptep,
......
...@@ -17,6 +17,8 @@ ...@@ -17,6 +17,8 @@
#ifdef CONFIG_MMU #ifdef CONFIG_MMU
#include <linux/mm_types.h>
/* /*
* Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction * Flush entire local TLB. 'sfence.vma' implicitly fences with the instruction
* cache as well, so a 'fence.i' is not necessary. * cache as well, so a 'fence.i' is not necessary.
......
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef _ASM_RISCV_VDSO_SYSCALLS_H
#define _ASM_RISCV_VDSO_SYSCALLS_H
#ifdef CONFIG_SMP
/* These syscalls are only used by the vDSO and are not in the uapi. */
#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
#endif
#endif /* _ASM_RISCV_VDSO_H */
...@@ -38,4 +38,8 @@ struct vdso_data { ...@@ -38,4 +38,8 @@ struct vdso_data {
(void __user *)((unsigned long)(base) + __vdso_##name); \ (void __user *)((unsigned long)(base) + __vdso_##name); \
}) })
#ifdef CONFIG_SMP
asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
#endif
#endif /* _ASM_RISCV_VDSO_H */ #endif /* _ASM_RISCV_VDSO_H */
...@@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu) ...@@ -108,3 +108,51 @@ void smp_send_reschedule(int cpu)
{ {
send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE);
} }
/*
* Performs an icache flush for the given MM context. RISC-V has no direct
* mechanism for instruction cache shoot downs, so instead we send an IPI that
* informs the remote harts they need to flush their local instruction caches.
* To avoid pathologically slow behavior in a common case (a bunch of
* single-hart processes on a many-hart machine, ie 'make -j') we avoid the
* IPIs for harts that are not currently executing a MM context and instead
* schedule a deferred local instruction cache flush to be performed before
* execution resumes on each hart.
*/
void flush_icache_mm(struct mm_struct *mm, bool local)
{
unsigned int cpu;
cpumask_t others, *mask;
preempt_disable();
/* Mark every hart's icache as needing a flush for this MM. */
mask = &mm->context.icache_stale_mask;
cpumask_setall(mask);
/* Flush this hart's I$ now, and mark it as flushed. */
cpu = smp_processor_id();
cpumask_clear_cpu(cpu, mask);
local_flush_icache_all();
/*
* Flush the I$ of other harts concurrently executing, and mark them as
* flushed.
*/
cpumask_andnot(&others, mm_cpumask(mm), cpumask_of(cpu));
local |= cpumask_empty(&others);
if (mm != current->active_mm || !local)
sbi_remote_fence_i(others.bits);
else {
/*
* It's assumed that at least one strongly ordered operation is
* performed on this hart between setting a hart's cpumask bit
* and scheduling this MM context on that hart. Sending an SBI
* remote message will do this, but in the case where no
* messages are sent we still need to order this hart's writes
* with flush_icache_deferred().
*/
smp_mb();
}
preempt_enable();
}
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
*/ */
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <asm/cmpxchg.h>
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/cacheflush.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len, static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags, unsigned long prot, unsigned long flags,
...@@ -47,3 +47,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, ...@@ -47,3 +47,34 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len,
return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12); return riscv_sys_mmap(addr, len, prot, flags, fd, offset, 12);
} }
#endif /* !CONFIG_64BIT */ #endif /* !CONFIG_64BIT */
#ifdef CONFIG_SMP
/*
* Allows the instruction cache to be flushed from userspace. Despite RISC-V
* having a direct 'fence.i' instruction available to userspace (which we
* can't trap!), that's not actually viable when running on Linux because the
* kernel might schedule a process on another hart. There is no way for
* userspace to handle this without invoking the kernel (as it doesn't know the
* thread->hart mappings), so we've defined a RISC-V specific system call to
* flush the instruction cache.
*
* sys_riscv_flush_icache() is defined to flush the instruction cache over an
* address range, with the flush applying to either all threads or just the
* caller. We don't currently do anything with the address range, that's just
* in there for forwards compatibility.
*/
SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
uintptr_t, flags)
{
struct mm_struct *mm = current->mm;
bool local = (flags & SYS_RISCV_FLUSH_ICACHE_LOCAL) != 0;
/* Check the reserved flags. */
if (unlikely(flags & !SYS_RISCV_FLUSH_ICACHE_ALL))
return -EINVAL;
flush_icache_mm(mm, local);
return 0;
}
#endif
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <linux/linkage.h> #include <linux/linkage.h>
#include <linux/syscalls.h> #include <linux/syscalls.h>
#include <asm-generic/syscalls.h> #include <asm-generic/syscalls.h>
#include <asm/vdso.h>
#undef __SYSCALL #undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call), #define __SYSCALL(nr, call) [nr] = (call),
...@@ -22,4 +23,5 @@ ...@@ -22,4 +23,5 @@
void *sys_call_table[__NR_syscalls] = { void *sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = sys_ni_syscall, [0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include <asm/unistd.h> #include <asm/unistd.h>
#include <asm/vdso-syscalls.h>
}; };
...@@ -2,6 +2,11 @@ ...@@ -2,6 +2,11 @@
# Symbols present in the vdso # Symbols present in the vdso
vdso-syms = rt_sigreturn vdso-syms = rt_sigreturn
vdso-syms += gettimeofday
vdso-syms += clock_gettime
vdso-syms += clock_getres
vdso-syms += getcpu
vdso-syms += flush_icache
# Files to link into the vdso # Files to link into the vdso
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) obj-vdso = $(patsubst %, %.o, $(vdso-syms))
......
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/linkage.h>
#include <asm/unistd.h>
.text
/* int __vdso_clock_getres(clockid_t clock_id, struct timespec *res); */
ENTRY(__vdso_clock_getres)
.cfi_startproc
/* For now, just do the syscall. */
li a7, __NR_clock_getres
ecall
ret
.cfi_endproc
ENDPROC(__vdso_clock_getres)
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/linkage.h>
#include <asm/unistd.h>
.text
/* int __vdso_clock_gettime(clockid_t clock_id, struct timespec *tp); */
ENTRY(__vdso_clock_gettime)
.cfi_startproc
/* For now, just do the syscall. */
li a7, __NR_clock_gettime
ecall
ret
.cfi_endproc
ENDPROC(__vdso_clock_gettime)
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/linkage.h>
#include <asm/unistd.h>
#include <asm/vdso-syscalls.h>
.text
/* int __vdso_flush_icache(void *start, void *end, unsigned long flags); */
ENTRY(__vdso_flush_icache)
.cfi_startproc
#ifdef CONFIG_SMP
li a7, __NR_riscv_flush_icache
ecall
#else
fence.i
li a0, 0
#endif
ret
.cfi_endproc
ENDPROC(__vdso_flush_icache)
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/linkage.h>
#include <asm/unistd.h>
.text
/* int __vdso_getcpu(unsigned *cpu, unsigned *node, void *unused); */
ENTRY(__vdso_getcpu)
.cfi_startproc
/* For now, just do the syscall. */
li a7, __NR_getcpu
ecall
ret
.cfi_endproc
ENDPROC(__vdso_getcpu)
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <linux/linkage.h>
#include <asm/unistd.h>
.text
/* int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); */
ENTRY(__vdso_gettimeofday)
.cfi_startproc
/* For now, just do the syscall. */
li a7, __NR_gettimeofday
ecall
ret
.cfi_endproc
ENDPROC(__vdso_gettimeofday)
...@@ -70,8 +70,11 @@ VERSION ...@@ -70,8 +70,11 @@ VERSION
LINUX_4.15 { LINUX_4.15 {
global: global:
__vdso_rt_sigreturn; __vdso_rt_sigreturn;
__vdso_cmpxchg32; __vdso_gettimeofday;
__vdso_cmpxchg64; __vdso_clock_gettime;
__vdso_clock_getres;
__vdso_getcpu;
__vdso_flush_icache;
local: *; local: *;
}; };
} }
...@@ -2,3 +2,4 @@ obj-y += init.o ...@@ -2,3 +2,4 @@ obj-y += init.o
obj-y += fault.o obj-y += fault.o
obj-y += extable.o obj-y += extable.o
obj-y += ioremap.o obj-y += ioremap.o
obj-y += cacheflush.o
/*
* Copyright (C) 2017 SiFive
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation, version 2.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*/
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
void flush_icache_pte(pte_t pte)
{
struct page *page = pte_page(pte);
if (!test_and_set_bit(PG_dcache_clean, &page->flags))
flush_icache_all();
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment