Commit bdbf0a4c authored by Russell King's avatar Russell King

Merge branch 'for-rmk/prefetch' of...

Merge branch 'for-rmk/prefetch' of git://git.kernel.org/pub/scm/linux/kernel/git/will/linux into devel-stable
parents 901e7e34 d779c07d
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
#define __ASM_ARM_ATOMIC_H #define __ASM_ARM_ATOMIC_H
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/prefetch.h>
#include <linux/types.h> #include <linux/types.h>
#include <linux/irqflags.h> #include <linux/irqflags.h>
#include <asm/barrier.h> #include <asm/barrier.h>
...@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v) ...@@ -41,6 +42,7 @@ static inline void atomic_add(int i, atomic_t *v)
unsigned long tmp; unsigned long tmp;
int result; int result;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic_add\n" __asm__ __volatile__("@ atomic_add\n"
"1: ldrex %0, [%3]\n" "1: ldrex %0, [%3]\n"
" add %0, %0, %4\n" " add %0, %0, %4\n"
...@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v) ...@@ -79,6 +81,7 @@ static inline void atomic_sub(int i, atomic_t *v)
unsigned long tmp; unsigned long tmp;
int result; int result;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic_sub\n" __asm__ __volatile__("@ atomic_sub\n"
"1: ldrex %0, [%3]\n" "1: ldrex %0, [%3]\n"
" sub %0, %0, %4\n" " sub %0, %0, %4\n"
...@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr) ...@@ -138,6 +141,7 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
{ {
unsigned long tmp, tmp2; unsigned long tmp, tmp2;
prefetchw(addr);
__asm__ __volatile__("@ atomic_clear_mask\n" __asm__ __volatile__("@ atomic_clear_mask\n"
"1: ldrex %0, [%3]\n" "1: ldrex %0, [%3]\n"
" bic %0, %0, %4\n" " bic %0, %0, %4\n"
...@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i) ...@@ -283,6 +287,7 @@ static inline void atomic64_set(atomic64_t *v, u64 i)
{ {
u64 tmp; u64 tmp;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic64_set\n" __asm__ __volatile__("@ atomic64_set\n"
"1: ldrexd %0, %H0, [%2]\n" "1: ldrexd %0, %H0, [%2]\n"
" strexd %0, %3, %H3, [%2]\n" " strexd %0, %3, %H3, [%2]\n"
...@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) ...@@ -299,6 +304,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v)
u64 result; u64 result;
unsigned long tmp; unsigned long tmp;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic64_add\n" __asm__ __volatile__("@ atomic64_add\n"
"1: ldrexd %0, %H0, [%3]\n" "1: ldrexd %0, %H0, [%3]\n"
" adds %0, %0, %4\n" " adds %0, %0, %4\n"
...@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) ...@@ -339,6 +345,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v)
u64 result; u64 result;
unsigned long tmp; unsigned long tmp;
prefetchw(&v->counter);
__asm__ __volatile__("@ atomic64_sub\n" __asm__ __volatile__("@ atomic64_sub\n"
"1: ldrexd %0, %H0, [%3]\n" "1: ldrexd %0, %H0, [%3]\n"
" subs %0, %0, %4\n" " subs %0, %0, %4\n"
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include <asm/hw_breakpoint.h> #include <asm/hw_breakpoint.h>
#include <asm/ptrace.h> #include <asm/ptrace.h>
#include <asm/types.h> #include <asm/types.h>
#include <asm/unified.h>
#ifdef __KERNEL__ #ifdef __KERNEL__
#define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \ #define STACK_TOP ((current->personality & ADDR_LIMIT_32BIT) ? \
...@@ -87,6 +88,17 @@ unsigned long get_wchan(struct task_struct *p); ...@@ -87,6 +88,17 @@ unsigned long get_wchan(struct task_struct *p);
#define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc #define KSTK_EIP(tsk) task_pt_regs(tsk)->ARM_pc
#define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp #define KSTK_ESP(tsk) task_pt_regs(tsk)->ARM_sp
#ifdef CONFIG_SMP
#define __ALT_SMP_ASM(smp, up) \
"9998: " smp "\n" \
" .pushsection \".alt.smp.init\", \"a\"\n" \
" .long 9998b\n" \
" " up "\n" \
" .popsection\n"
#else
#define __ALT_SMP_ASM(smp, up) up
#endif
/* /*
* Prefetching support - only ARMv5. * Prefetching support - only ARMv5.
*/ */
...@@ -97,17 +109,22 @@ static inline void prefetch(const void *ptr) ...@@ -97,17 +109,22 @@ static inline void prefetch(const void *ptr)
{ {
__asm__ __volatile__( __asm__ __volatile__(
"pld\t%a0" "pld\t%a0"
: :: "p" (ptr));
: "p" (ptr)
: "cc");
} }
#if __LINUX_ARM_ARCH__ >= 7 && defined(CONFIG_SMP)
#define ARCH_HAS_PREFETCHW #define ARCH_HAS_PREFETCHW
#define prefetchw(ptr) prefetch(ptr) static inline void prefetchw(const void *ptr)
{
#define ARCH_HAS_SPINLOCK_PREFETCH __asm__ __volatile__(
#define spin_lock_prefetch(x) do { } while (0) ".arch_extension mp\n"
__ALT_SMP_ASM(
WASM(pldw) "\t%a0",
WASM(pld) "\t%a0"
)
:: "p" (ptr));
}
#endif
#endif #endif
#define HAVE_ARCH_PICK_MMAP_LAYOUT #define HAVE_ARCH_PICK_MMAP_LAYOUT
......
...@@ -5,21 +5,13 @@ ...@@ -5,21 +5,13 @@
#error SMP not supported on pre-ARMv6 CPUs #error SMP not supported on pre-ARMv6 CPUs
#endif #endif
#include <asm/processor.h> #include <linux/prefetch.h>
/* /*
* sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K * sev and wfe are ARMv6K extensions. Uniprocessor ARMv6 may not have the K
* extensions, so when running on UP, we have to patch these instructions away. * extensions, so when running on UP, we have to patch these instructions away.
*/ */
#define ALT_SMP(smp, up) \
"9998: " smp "\n" \
" .pushsection \".alt.smp.init\", \"a\"\n" \
" .long 9998b\n" \
" " up "\n" \
" .popsection\n"
#ifdef CONFIG_THUMB2_KERNEL #ifdef CONFIG_THUMB2_KERNEL
#define SEV ALT_SMP("sev.w", "nop.w")
/* /*
* For Thumb-2, special care is needed to ensure that the conditional WFE * For Thumb-2, special care is needed to ensure that the conditional WFE
* instruction really does assemble to exactly 4 bytes (as required by * instruction really does assemble to exactly 4 bytes (as required by
...@@ -31,17 +23,18 @@ ...@@ -31,17 +23,18 @@
* the assembler won't change IT instructions which are explicitly present * the assembler won't change IT instructions which are explicitly present
* in the input. * in the input.
*/ */
#define WFE(cond) ALT_SMP( \ #define WFE(cond) __ALT_SMP_ASM( \
"it " cond "\n\t" \ "it " cond "\n\t" \
"wfe" cond ".n", \ "wfe" cond ".n", \
\ \
"nop.w" \ "nop.w" \
) )
#else #else
#define SEV ALT_SMP("sev", "nop") #define WFE(cond) __ALT_SMP_ASM("wfe" cond, "nop")
#define WFE(cond) ALT_SMP("wfe" cond, "nop")
#endif #endif
#define SEV __ALT_SMP_ASM(WASM(sev), WASM(nop))
static inline void dsb_sev(void) static inline void dsb_sev(void)
{ {
#if __LINUX_ARM_ARCH__ >= 7 #if __LINUX_ARM_ARCH__ >= 7
...@@ -77,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -77,6 +70,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
u32 newval; u32 newval;
arch_spinlock_t lockval; arch_spinlock_t lockval;
prefetchw(&lock->slock);
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%3]\n" "1: ldrex %0, [%3]\n"
" add %1, %0, %4\n" " add %1, %0, %4\n"
...@@ -100,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) ...@@ -100,6 +94,7 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
unsigned long contended, res; unsigned long contended, res;
u32 slock; u32 slock;
prefetchw(&lock->slock);
do { do {
__asm__ __volatile__( __asm__ __volatile__(
" ldrex %0, [%3]\n" " ldrex %0, [%3]\n"
...@@ -152,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) ...@@ -152,6 +147,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw)
{ {
unsigned long tmp; unsigned long tmp;
prefetchw(&rw->lock);
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%1]\n" "1: ldrex %0, [%1]\n"
" teq %0, #0\n" " teq %0, #0\n"
...@@ -170,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) ...@@ -170,6 +166,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw)
{ {
unsigned long contended, res; unsigned long contended, res;
prefetchw(&rw->lock);
do { do {
__asm__ __volatile__( __asm__ __volatile__(
" ldrex %0, [%2]\n" " ldrex %0, [%2]\n"
...@@ -203,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) ...@@ -203,7 +200,7 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
} }
/* write_can_lock - would write_trylock() succeed? */ /* write_can_lock - would write_trylock() succeed? */
#define arch_write_can_lock(x) ((x)->lock == 0) #define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0)
/* /*
* Read locks are a bit more hairy: * Read locks are a bit more hairy:
...@@ -221,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) ...@@ -221,6 +218,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw)
{ {
unsigned long tmp, tmp2; unsigned long tmp, tmp2;
prefetchw(&rw->lock);
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%2]\n" "1: ldrex %0, [%2]\n"
" adds %0, %0, #1\n" " adds %0, %0, #1\n"
...@@ -241,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) ...@@ -241,6 +239,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw)
smp_mb(); smp_mb();
prefetchw(&rw->lock);
__asm__ __volatile__( __asm__ __volatile__(
"1: ldrex %0, [%2]\n" "1: ldrex %0, [%2]\n"
" sub %0, %0, #1\n" " sub %0, %0, #1\n"
...@@ -259,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) ...@@ -259,6 +258,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
{ {
unsigned long contended, res; unsigned long contended, res;
prefetchw(&rw->lock);
do { do {
__asm__ __volatile__( __asm__ __volatile__(
" ldrex %0, [%2]\n" " ldrex %0, [%2]\n"
...@@ -280,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) ...@@ -280,7 +280,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
} }
/* read_can_lock - would read_trylock() succeed? */ /* read_can_lock - would read_trylock() succeed? */
#define arch_read_can_lock(x) ((x)->lock < 0x80000000) #define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x80000000)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
......
...@@ -25,7 +25,7 @@ typedef struct { ...@@ -25,7 +25,7 @@ typedef struct {
#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } #define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
typedef struct { typedef struct {
volatile unsigned int lock; u32 lock;
} arch_rwlock_t; } arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED { 0 } #define __ARCH_RW_LOCK_UNLOCKED { 0 }
......
...@@ -38,6 +38,8 @@ ...@@ -38,6 +38,8 @@
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
#define W(instr) instr.w #define W(instr) instr.w
#define BSYM(sym) sym + 1 #define BSYM(sym) sym + 1
#else
#define WASM(instr) #instr ".w"
#endif #endif
#else /* !CONFIG_THUMB2_KERNEL */ #else /* !CONFIG_THUMB2_KERNEL */
...@@ -50,6 +52,8 @@ ...@@ -50,6 +52,8 @@
#ifdef __ASSEMBLY__ #ifdef __ASSEMBLY__
#define W(instr) instr #define W(instr) instr
#define BSYM(sym) sym #define BSYM(sym) sym
#else
#define WASM(instr) #instr
#endif #endif
#endif /* CONFIG_THUMB2_KERNEL */ #endif /* CONFIG_THUMB2_KERNEL */
......
...@@ -10,6 +10,11 @@ UNWIND( .fnstart ) ...@@ -10,6 +10,11 @@ UNWIND( .fnstart )
and r3, r0, #31 @ Get bit offset and r3, r0, #31 @ Get bit offset
mov r0, r0, lsr #5 mov r0, r0, lsr #5
add r1, r1, r0, lsl #2 @ Get word offset add r1, r1, r0, lsl #2 @ Get word offset
#if __LINUX_ARM_ARCH__ >= 7
.arch_extension mp
ALT_SMP(W(pldw) [r1])
ALT_UP(W(nop))
#endif
mov r3, r2, lsl r3 mov r3, r2, lsl r3
1: ldrex r2, [r1] 1: ldrex r2, [r1]
\instr r2, r2, r3 \instr r2, r2, r3
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment