Commit dd2384a7 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'arc-v4.2-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

Pull ARC fixes from Vineet Gupta:
 "Here's a late pull request for accumulated ARC fixes which came out of
  extended testing of the new ARCv2 port with LTP etc.  llock/scond
  livelock workaround has been reviewed by PeterZ.  The changes look a
  lot but I've crafted them into finer grained patches for better
  tracking later.

  I have some more fixes (ARC Futex backend) ready to go but those will
  have to wait for tglx to return from vacation.

  Summary:
   - Enable a reduced config of HS38 (w/o div-rem, ll64...)
   - Add software workaround for LLOCK/SCOND livelock
   - Fallout of a recent pt_regs update"

* tag 'arc-v4.2-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
  ARCv2: spinlock/rwlock/atomics: reduce 1 instruction in exponential backoff
  ARC: Make pt_regs regs unsigned
  ARCv2: spinlock/rwlock: Reset retry delay when starting a new spin-wait cycle
  ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff
  ARC: LLOCK/SCOND based rwlock
  ARC: LLOCK/SCOND based spin_lock
  ARC: refactor atomic inline asm operands with symbolic names
  Revert "ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock"
  ARCv2: [axs103_smp] Reduce clk for Quad FPGA configs
  ARCv2: Fix the peripheral address space detection
  ARCv2: allow selection of page size for MMUv4
  ARCv2: lib: memset: Don't assume 64-bit load/stores
  ARCv2: lib: memcpy: Missing PREFETCHW
  ARCv2: add knob for DIV_REV in Kconfig
  ARC/time: Migrate to new 'set-state' interface
parents b3b98a55 10971638
...@@ -313,11 +313,11 @@ config ARC_PAGE_SIZE_8K ...@@ -313,11 +313,11 @@ config ARC_PAGE_SIZE_8K
config ARC_PAGE_SIZE_16K config ARC_PAGE_SIZE_16K
bool "16KB" bool "16KB"
depends on ARC_MMU_V3 depends on ARC_MMU_V3 || ARC_MMU_V4
config ARC_PAGE_SIZE_4K config ARC_PAGE_SIZE_4K
bool "4KB" bool "4KB"
depends on ARC_MMU_V3 depends on ARC_MMU_V3 || ARC_MMU_V4
endchoice endchoice
...@@ -365,6 +365,11 @@ config ARC_HAS_LLSC ...@@ -365,6 +365,11 @@ config ARC_HAS_LLSC
default y default y
depends on !ARC_CANT_LLSC depends on !ARC_CANT_LLSC
config ARC_STAR_9000923308
bool "Workaround for llock/scond livelock"
default y
depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
config ARC_HAS_SWAPE config ARC_HAS_SWAPE
bool "Insn: SWAPE (endian-swap)" bool "Insn: SWAPE (endian-swap)"
default y default y
...@@ -379,6 +384,10 @@ config ARC_HAS_LL64 ...@@ -379,6 +384,10 @@ config ARC_HAS_LL64
dest operands with 2 possible source operands. dest operands with 2 possible source operands.
default y default y
config ARC_HAS_DIV_REM
bool "Insn: div, divu, rem, remu"
default y
config ARC_HAS_RTC config ARC_HAS_RTC
bool "Local 64-bit r/o cycle counter" bool "Local 64-bit r/o cycle counter"
default n default n
......
...@@ -36,8 +36,16 @@ cflags-$(atleast_gcc44) += -fsection-anchors ...@@ -36,8 +36,16 @@ cflags-$(atleast_gcc44) += -fsection-anchors
cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock
cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape
ifdef CONFIG_ISA_ARCV2
ifndef CONFIG_ARC_HAS_LL64 ifndef CONFIG_ARC_HAS_LL64
cflags-$(CONFIG_ISA_ARCV2) += -mno-ll64 cflags-y += -mno-ll64
endif
ifndef CONFIG_ARC_HAS_DIV_REM
cflags-y += -mno-div-rem
endif
endif endif
cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables
......
...@@ -89,11 +89,10 @@ ...@@ -89,11 +89,10 @@
#define ECR_C_BIT_DTLB_LD_MISS 8 #define ECR_C_BIT_DTLB_LD_MISS 8
#define ECR_C_BIT_DTLB_ST_MISS 9 #define ECR_C_BIT_DTLB_ST_MISS 9
/* Auxiliary registers */ /* Auxiliary registers */
#define AUX_IDENTITY 4 #define AUX_IDENTITY 4
#define AUX_INTR_VEC_BASE 0x25 #define AUX_INTR_VEC_BASE 0x25
#define AUX_NON_VOL 0x5e
/* /*
* Floating Pt Registers * Floating Pt Registers
...@@ -240,9 +239,9 @@ struct bcr_extn_xymem { ...@@ -240,9 +239,9 @@ struct bcr_extn_xymem {
struct bcr_perip { struct bcr_perip {
#ifdef CONFIG_CPU_BIG_ENDIAN #ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int start:8, pad2:8, sz:8, pad:8; unsigned int start:8, pad2:8, sz:8, ver:8;
#else #else
unsigned int pad:8, sz:8, pad2:8, start:8; unsigned int ver:8, sz:8, pad2:8, start:8;
#endif #endif
}; };
......
...@@ -23,33 +23,60 @@ ...@@ -23,33 +23,60 @@
#define atomic_set(v, i) (((v)->counter) = (i)) #define atomic_set(v, i) (((v)->counter) = (i))
#ifdef CONFIG_ISA_ARCV2 #ifdef CONFIG_ARC_STAR_9000923308
#define PREFETCHW " prefetchw [%1] \n"
#else #define SCOND_FAIL_RETRY_VAR_DEF \
#define PREFETCHW unsigned int delay = 1, tmp; \
#define SCOND_FAIL_RETRY_ASM \
" bz 4f \n" \
" ; --- scond fail delay --- \n" \
" mov %[tmp], %[delay] \n" /* tmp = delay */ \
"2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
" sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
" rol %[delay], %[delay] \n" /* delay *= 2 */ \
" b 1b \n" /* start over */ \
"4: ; --- success --- \n" \
#define SCOND_FAIL_RETRY_VARS \
,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \
#else /* !CONFIG_ARC_STAR_9000923308 */
#define SCOND_FAIL_RETRY_VAR_DEF
#define SCOND_FAIL_RETRY_ASM \
" bnz 1b \n" \
#define SCOND_FAIL_RETRY_VARS
#endif #endif
#define ATOMIC_OP(op, c_op, asm_op) \ #define ATOMIC_OP(op, c_op, asm_op) \
static inline void atomic_##op(int i, atomic_t *v) \ static inline void atomic_##op(int i, atomic_t *v) \
{ \ { \
unsigned int temp; \ unsigned int val; \
SCOND_FAIL_RETRY_VAR_DEF \
\ \
__asm__ __volatile__( \ __asm__ __volatile__( \
"1: \n" \ "1: llock %[val], [%[ctr]] \n" \
PREFETCHW \ " " #asm_op " %[val], %[val], %[i] \n" \
" llock %0, [%1] \n" \ " scond %[val], [%[ctr]] \n" \
" " #asm_op " %0, %0, %2 \n" \ " \n" \
" scond %0, [%1] \n" \ SCOND_FAIL_RETRY_ASM \
" bnz 1b \n" \ \
: "=&r"(temp) /* Early clobber, to prevent reg reuse */ \ : [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
: "r"(&v->counter), "ir"(i) \ SCOND_FAIL_RETRY_VARS \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
: "cc"); \ : "cc"); \
} \ } \
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \ #define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int atomic_##op##_return(int i, atomic_t *v) \ static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \ { \
unsigned int temp; \ unsigned int val; \
SCOND_FAIL_RETRY_VAR_DEF \
\ \
/* \ /* \
* Explicit full memory barrier needed before/after as \ * Explicit full memory barrier needed before/after as \
...@@ -58,19 +85,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \ ...@@ -58,19 +85,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
smp_mb(); \ smp_mb(); \
\ \
__asm__ __volatile__( \ __asm__ __volatile__( \
"1: \n" \ "1: llock %[val], [%[ctr]] \n" \
PREFETCHW \ " " #asm_op " %[val], %[val], %[i] \n" \
" llock %0, [%1] \n" \ " scond %[val], [%[ctr]] \n" \
" " #asm_op " %0, %0, %2 \n" \ " \n" \
" scond %0, [%1] \n" \ SCOND_FAIL_RETRY_ASM \
" bnz 1b \n" \ \
: "=&r"(temp) \ : [val] "=&r" (val) \
: "r"(&v->counter), "ir"(i) \ SCOND_FAIL_RETRY_VARS \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \ : "cc"); \
\ \
smp_mb(); \ smp_mb(); \
\ \
return temp; \ return val; \
} }
#else /* !CONFIG_ARC_HAS_LLSC */ #else /* !CONFIG_ARC_HAS_LLSC */
...@@ -150,6 +179,9 @@ ATOMIC_OP(and, &=, and) ...@@ -150,6 +179,9 @@ ATOMIC_OP(and, &=, and)
#undef ATOMIC_OPS #undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN #undef ATOMIC_OP_RETURN
#undef ATOMIC_OP #undef ATOMIC_OP
#undef SCOND_FAIL_RETRY_VAR_DEF
#undef SCOND_FAIL_RETRY_ASM
#undef SCOND_FAIL_RETRY_VARS
/** /**
* __atomic_add_unless - add unless the number is a given value * __atomic_add_unless - add unless the number is a given value
......
...@@ -20,20 +20,20 @@ ...@@ -20,20 +20,20 @@
struct pt_regs { struct pt_regs {
/* Real registers */ /* Real registers */
long bta; /* bta_l1, bta_l2, erbta */ unsigned long bta; /* bta_l1, bta_l2, erbta */
long lp_start, lp_end, lp_count; unsigned long lp_start, lp_end, lp_count;
long status32; /* status32_l1, status32_l2, erstatus */ unsigned long status32; /* status32_l1, status32_l2, erstatus */
long ret; /* ilink1, ilink2 or eret */ unsigned long ret; /* ilink1, ilink2 or eret */
long blink; unsigned long blink;
long fp; unsigned long fp;
long r26; /* gp */ unsigned long r26; /* gp */
long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
long sp; /* user/kernel sp depending on where we came from */ unsigned long sp; /* User/Kernel depending on where we came from */
long orig_r0; unsigned long orig_r0;
/* /*
* To distinguish bet excp, syscall, irq * To distinguish bet excp, syscall, irq
...@@ -55,13 +55,13 @@ struct pt_regs { ...@@ -55,13 +55,13 @@ struct pt_regs {
unsigned long event; unsigned long event;
}; };
long user_r25; unsigned long user_r25;
}; };
#else #else
struct pt_regs { struct pt_regs {
long orig_r0; unsigned long orig_r0;
union { union {
struct { struct {
...@@ -76,26 +76,26 @@ struct pt_regs { ...@@ -76,26 +76,26 @@ struct pt_regs {
unsigned long event; unsigned long event;
}; };
long bta; /* bta_l1, bta_l2, erbta */ unsigned long bta; /* bta_l1, bta_l2, erbta */
long user_r25; unsigned long user_r25;
long r26; /* gp */ unsigned long r26; /* gp */
long fp; unsigned long fp;
long sp; /* user/kernel sp depending on where we came from */ unsigned long sp; /* user/kernel sp depending on where we came from */
long r12; unsigned long r12;
/*------- Below list auto saved by h/w -----------*/ /*------- Below list auto saved by h/w -----------*/
long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11; unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
long blink; unsigned long blink;
long lp_end, lp_start, lp_count; unsigned long lp_end, lp_start, lp_count;
long ei, ldi, jli; unsigned long ei, ldi, jli;
long ret; unsigned long ret;
long status32; unsigned long status32;
}; };
#endif #endif
...@@ -103,10 +103,10 @@ struct pt_regs { ...@@ -103,10 +103,10 @@ struct pt_regs {
/* Callee saved registers - need to be saved only when you are scheduled out */ /* Callee saved registers - need to be saved only when you are scheduled out */
struct callee_regs { struct callee_regs {
long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13; unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
}; };
#define instruction_pointer(regs) (unsigned long)((regs)->ret) #define instruction_pointer(regs) ((regs)->ret)
#define profile_pc(regs) instruction_pointer(regs) #define profile_pc(regs) instruction_pointer(regs)
/* return 1 if user mode or 0 if kernel mode */ /* return 1 if user mode or 0 if kernel mode */
...@@ -142,7 +142,7 @@ struct callee_regs { ...@@ -142,7 +142,7 @@ struct callee_regs {
static inline long regs_return_value(struct pt_regs *regs) static inline long regs_return_value(struct pt_regs *regs)
{ {
return regs->r0; return (long)regs->r0;
} }
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -18,9 +18,518 @@ ...@@ -18,9 +18,518 @@
#define arch_spin_unlock_wait(x) \ #define arch_spin_unlock_wait(x) \
do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0) do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
#ifdef CONFIG_ARC_HAS_LLSC
/*
* A normal LLOCK/SCOND based system, w/o need for livelock workaround
*/
#ifndef CONFIG_ARC_STAR_9000923308
static inline void arch_spin_lock(arch_spinlock_t *lock) static inline void arch_spin_lock(arch_spinlock_t *lock)
{ {
unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; unsigned int val;
smp_mb();
__asm__ __volatile__(
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned int val, got_it = 0;
smp_mb();
__asm__ __volatile__(
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bnz 1b \n"
" mov %[got_it], 1 \n"
"4: \n"
" \n"
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
smp_mb();
}
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
* Unfair locking as Writers could be starved indefinitely by Reader(s)
*/
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* zero means writer holds the lock exclusively, deny Reader.
* Otherwise grant lock to first/subseq reader
*
* if (rw->counter > 0) {
* rw->counter--;
* ret = 1;
* }
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */
" sub %[val], %[val], 1 \n" /* reader lock */
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
smp_mb();
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
" sub %[val], %[val], 1 \n" /* counter-- */
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n" /* retry if collided with someone */
" mov %[got_it], 1 \n"
" \n"
"4: ; --- done --- \n"
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
* deny writer. Otherwise if unlocked grant to writer
* Hence the claim that Linux rwlocks are unfair to writers.
* (can be starved for an indefinite time by readers).
*
* if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
* rw->counter = 0;
* ret = 1;
* }
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
smp_mb();
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n" /* retry if collided with someone */
" mov %[got_it], 1 \n"
" \n"
"4: ; --- done --- \n"
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* rw->counter++;
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" add %[val], %[val], 1 \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter))
: "memory", "cc");
smp_mb();
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
smp_mb();
rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
smp_mb();
}
#else /* CONFIG_ARC_STAR_9000923308 */
/*
* HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
* coherency transactions in the SCU. The exclusive line state keeps rotating
* among contenting cores leading to a never ending cycle. So break the cycle
* by deferring the retry of failed exclusive access (SCOND). The actual delay
* needed is function of number of contending cores as well as the unrelated
* coherency traffic from other cores. To keep the code simple, start off with
* small delay of 1 which would suffice most cases and in case of contention
* double the delay. Eventually the delay is sufficient such that the coherency
* pipeline is drained, thus a subsequent exclusive access would succeed.
*/
#define SCOND_FAIL_RETRY_VAR_DEF \
unsigned int delay, tmp; \
#define SCOND_FAIL_RETRY_ASM \
" ; --- scond fail delay --- \n" \
" mov %[tmp], %[delay] \n" /* tmp = delay */ \
"2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
" sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
" rol %[delay], %[delay] \n" /* delay *= 2 */ \
" b 1b \n" /* start over */ \
" \n" \
"4: ; --- done --- \n" \
#define SCOND_FAIL_RETRY_VARS \
,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 0b \n" /* spin while LOCKED */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bz 4f \n" /* done */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
smp_mb();
}
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
* Unfair locking as Writers could be starved indefinitely by Reader(s)
*/
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
/*
* zero means writer holds the lock exclusively, deny Reader.
* Otherwise grant lock to first/subseq reader
*
* if (rw->counter > 0) {
* rw->counter--;
* ret = 1;
* }
*/
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 0b\n" /* <= 0: spin while write locked */
" sub %[val], %[val], 1 \n" /* reader lock */
" scond %[val], [%[rwlock]] \n"
" bz 4f \n" /* done */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
" sub %[val], %[val], 1 \n" /* counter-- */
" scond %[val], [%[rwlock]] \n"
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
/*
* If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
* deny writer. Otherwise if unlocked grant to writer
* Hence the claim that Linux rwlocks are unfair to writers.
* (can be starved for an indefinite time by readers).
*
* if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
* rw->counter = 0;
* ret = 1;
* }
*/
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 0b \n" /* while !UNLOCKED spin */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bz 4f \n"
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* rw->counter++;
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" add %[val], %[val], 1 \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter))
: "memory", "cc");
smp_mb();
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" scond %[UNLOCKED], [%[rwlock]]\n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__)
: "memory", "cc");
smp_mb();
}
#undef SCOND_FAIL_RETRY_VAR_DEF
#undef SCOND_FAIL_RETRY_ASM
#undef SCOND_FAIL_RETRY_VARS
#endif /* CONFIG_ARC_STAR_9000923308 */
#else /* !CONFIG_ARC_HAS_LLSC */
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
/* /*
* This smp_mb() is technically superfluous, we only need the one * This smp_mb() is technically superfluous, we only need the one
...@@ -33,7 +542,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -33,7 +542,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
__asm__ __volatile__( __asm__ __volatile__(
"1: ex %0, [%1] \n" "1: ex %0, [%1] \n"
" breq %0, %2, 1b \n" " breq %0, %2, 1b \n"
: "+&r" (tmp) : "+&r" (val)
: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__) : "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
: "memory"); : "memory");
...@@ -48,26 +557,27 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) ...@@ -48,26 +557,27 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
smp_mb(); smp_mb();
} }
/* 1 - lock taken successfully */
static inline int arch_spin_trylock(arch_spinlock_t *lock) static inline int arch_spin_trylock(arch_spinlock_t *lock)
{ {
unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__; unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
smp_mb(); smp_mb();
__asm__ __volatile__( __asm__ __volatile__(
"1: ex %0, [%1] \n" "1: ex %0, [%1] \n"
: "+r" (tmp) : "+r" (val)
: "r"(&(lock->slock)) : "r"(&(lock->slock))
: "memory"); : "memory");
smp_mb(); smp_mb();
return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__); return (val == __ARCH_SPIN_LOCK_UNLOCKED__);
} }
static inline void arch_spin_unlock(arch_spinlock_t *lock) static inline void arch_spin_unlock(arch_spinlock_t *lock)
{ {
unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__; unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__;
/* /*
* RELEASE barrier: given the instructions avail on ARCv2, full barrier * RELEASE barrier: given the instructions avail on ARCv2, full barrier
...@@ -77,7 +587,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) ...@@ -77,7 +587,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
__asm__ __volatile__( __asm__ __volatile__(
" ex %0, [%1] \n" " ex %0, [%1] \n"
: "+r" (tmp) : "+r" (val)
: "r"(&(lock->slock)) : "r"(&(lock->slock))
: "memory"); : "memory");
...@@ -90,19 +600,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) ...@@ -90,19 +600,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
/* /*
* Read-write spinlocks, allowing multiple readers but only one writer. * Read-write spinlocks, allowing multiple readers but only one writer.
* Unfair locking as Writers could be starved indefinitely by Reader(s)
* *
* The spinlock itself is contained in @counter and access to it is * The spinlock itself is contained in @counter and access to it is
* serialized with @lock_mutex. * serialized with @lock_mutex.
*
* Unfair locking as Writers could be starved indefinitely by Reader(s)
*/ */
/* Would read_trylock() succeed? */
#define arch_read_can_lock(x) ((x)->counter > 0)
/* Would write_trylock() succeed? */
#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__)
/* 1 - lock taken successfully */ /* 1 - lock taken successfully */
static inline int arch_read_trylock(arch_rwlock_t *rw) static inline int arch_read_trylock(arch_rwlock_t *rw)
{ {
...@@ -173,6 +676,11 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) ...@@ -173,6 +676,11 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
arch_spin_unlock(&(rw->lock_mutex)); arch_spin_unlock(&(rw->lock_mutex));
} }
#endif
#define arch_read_can_lock(x) ((x)->counter > 0)
#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
......
...@@ -26,7 +26,9 @@ typedef struct { ...@@ -26,7 +26,9 @@ typedef struct {
*/ */
typedef struct { typedef struct {
volatile unsigned int counter; volatile unsigned int counter;
#ifndef CONFIG_ARC_HAS_LLSC
arch_spinlock_t lock_mutex; arch_spinlock_t lock_mutex;
#endif
} arch_rwlock_t; } arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000 #define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000
......
...@@ -32,20 +32,20 @@ ...@@ -32,20 +32,20 @@
*/ */
struct user_regs_struct { struct user_regs_struct {
long pad; unsigned long pad;
struct { struct {
long bta, lp_start, lp_end, lp_count; unsigned long bta, lp_start, lp_end, lp_count;
long status32, ret, blink, fp, gp; unsigned long status32, ret, blink, fp, gp;
long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0; unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
long sp; unsigned long sp;
} scratch; } scratch;
long pad2; unsigned long pad2;
struct { struct {
long r25, r24, r23, r22, r21, r20; unsigned long r25, r24, r23, r22, r21, r20;
long r19, r18, r17, r16, r15, r14, r13; unsigned long r19, r18, r17, r16, r15, r14, r13;
} callee; } callee;
long efa; /* break pt addr, for break points in delay slots */ unsigned long efa; /* break pt addr, for break points in delay slots */
long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */ unsigned long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */
}; };
#endif /* !__ASSEMBLY__ */ #endif /* !__ASSEMBLY__ */
......
...@@ -47,6 +47,7 @@ static void read_arc_build_cfg_regs(void) ...@@ -47,6 +47,7 @@ static void read_arc_build_cfg_regs(void)
struct bcr_perip uncached_space; struct bcr_perip uncached_space;
struct bcr_generic bcr; struct bcr_generic bcr;
struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
unsigned long perip_space;
FIX_PTR(cpu); FIX_PTR(cpu);
READ_BCR(AUX_IDENTITY, cpu->core); READ_BCR(AUX_IDENTITY, cpu->core);
...@@ -56,7 +57,12 @@ static void read_arc_build_cfg_regs(void) ...@@ -56,7 +57,12 @@ static void read_arc_build_cfg_regs(void)
cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE); cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space); READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE); if (uncached_space.ver < 3)
perip_space = uncached_space.start << 24;
else
perip_space = read_aux_reg(AUX_NON_VOL) & 0xF0000000;
BUG_ON(perip_space != ARC_UNCACHED_ADDR_SPACE);
READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy); READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
...@@ -330,6 +336,10 @@ static void arc_chk_core_config(void) ...@@ -330,6 +336,10 @@ static void arc_chk_core_config(void)
pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n"); pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
else if (!cpu->extn.fpu_dp && fpu_enabled) else if (!cpu->extn.fpu_dp && fpu_enabled)
panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n"); panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
!IS_ENABLED(CONFIG_ARC_STAR_9000923308))
panic("llock/scond livelock workaround missing\n");
} }
/* /*
......
...@@ -203,34 +203,24 @@ static int arc_clkevent_set_next_event(unsigned long delta, ...@@ -203,34 +203,24 @@ static int arc_clkevent_set_next_event(unsigned long delta,
return 0; return 0;
} }
static void arc_clkevent_set_mode(enum clock_event_mode mode, static int arc_clkevent_set_periodic(struct clock_event_device *dev)
struct clock_event_device *dev)
{ {
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
/* /*
* At X Hz, 1 sec = 1000ms -> X cycles; * At X Hz, 1 sec = 1000ms -> X cycles;
* 10ms -> X / 100 cycles * 10ms -> X / 100 cycles
*/ */
arc_timer_event_setup(arc_get_core_freq() / HZ); arc_timer_event_setup(arc_get_core_freq() / HZ);
break; return 0;
case CLOCK_EVT_MODE_ONESHOT:
break;
default:
break;
}
return;
} }
static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = { static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = {
.name = "ARC Timer0", .name = "ARC Timer0",
.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC, .features = CLOCK_EVT_FEAT_ONESHOT |
.mode = CLOCK_EVT_MODE_UNUSED, CLOCK_EVT_FEAT_PERIODIC,
.rating = 300, .rating = 300,
.irq = TIMER0_IRQ, /* hardwired, no need for resources */ .irq = TIMER0_IRQ, /* hardwired, no need for resources */
.set_next_event = arc_clkevent_set_next_event, .set_next_event = arc_clkevent_set_next_event,
.set_mode = arc_clkevent_set_mode, .set_state_periodic = arc_clkevent_set_periodic,
}; };
static irqreturn_t timer_irq_handler(int irq, void *dev_id) static irqreturn_t timer_irq_handler(int irq, void *dev_id)
...@@ -240,7 +230,7 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id) ...@@ -240,7 +230,7 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id)
* irq_set_chip_and_handler() asked for handle_percpu_devid_irq() * irq_set_chip_and_handler() asked for handle_percpu_devid_irq()
*/ */
struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device); struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
int irq_reenable = evt->mode == CLOCK_EVT_MODE_PERIODIC; int irq_reenable = clockevent_state_periodic(evt);
/* /*
* Any write to CTRL reg ACks the interrupt, we rewrite the * Any write to CTRL reg ACks the interrupt, we rewrite the
......
...@@ -206,7 +206,7 @@ unalignedOffby3: ...@@ -206,7 +206,7 @@ unalignedOffby3:
ld.ab r6, [r1, 4] ld.ab r6, [r1, 4]
prefetch [r1, 28] ;Prefetch the next read location prefetch [r1, 28] ;Prefetch the next read location
ld.ab r8, [r1,4] ld.ab r8, [r1,4]
prefetch [r3, 32] ;Prefetch the next write location prefetchw [r3, 32] ;Prefetch the next write location
SHIFT_1 (r7, r6, 8) SHIFT_1 (r7, r6, 8)
or r7, r7, r5 or r7, r7, r5
......
...@@ -10,12 +10,6 @@ ...@@ -10,12 +10,6 @@
#undef PREALLOC_NOT_AVAIL #undef PREALLOC_NOT_AVAIL
#ifdef PREALLOC_NOT_AVAIL
#define PREWRITE(A,B) prefetchw [(A),(B)]
#else
#define PREWRITE(A,B) prealloc [(A),(B)]
#endif
ENTRY(memset) ENTRY(memset)
prefetchw [r0] ; Prefetch the write location prefetchw [r0] ; Prefetch the write location
mov.f 0, r2 mov.f 0, r2
...@@ -51,9 +45,15 @@ ENTRY(memset) ...@@ -51,9 +45,15 @@ ENTRY(memset)
;;; Convert len to Dwords, unfold x8 ;;; Convert len to Dwords, unfold x8
lsr.f lp_count, lp_count, 6 lsr.f lp_count, lp_count, 6
lpnz @.Lset64bytes lpnz @.Lset64bytes
;; LOOP START ;; LOOP START
PREWRITE(r3, 64) ;Prefetch the next write location #ifdef PREALLOC_NOT_AVAIL
prefetchw [r3, 64] ;Prefetch the next write location
#else
prealloc [r3, 64]
#endif
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
...@@ -62,16 +62,45 @@ ENTRY(memset) ...@@ -62,16 +62,45 @@ ENTRY(memset)
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
#else
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
#endif
.Lset64bytes: .Lset64bytes:
lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
lpnz .Lset32bytes lpnz .Lset32bytes
;; LOOP START ;; LOOP START
prefetchw [r3, 32] ;Prefetch the next write location prefetchw [r3, 32] ;Prefetch the next write location
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
std.ab r4, [r3, 8] std.ab r4, [r3, 8]
#else
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
#endif
.Lset32bytes: .Lset32bytes:
and.f lp_count, r2, 0x1F ;Last remaining 31 bytes and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
......
...@@ -389,6 +389,21 @@ axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od) ...@@ -389,6 +389,21 @@ axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od)
static void __init axs103_early_init(void) static void __init axs103_early_init(void)
{ {
/*
* AXS103 configurations for SMP/QUAD configurations share device tree
* which defaults to 90 MHz. However recent failures of Quad config
* revealed P&R timing violations so clamp it down to safe 50 MHz
* Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
*
* This hack is really hacky as of now. Fix it properly by getting the
* number of cores as return value of platform's early SMP callback
*/
#ifdef CONFIG_ARC_MCIP
unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
if (num_cores > 2)
arc_set_core_freq(50 * 1000000);
#endif
switch (arc_get_core_freq()/1000000) { switch (arc_get_core_freq()/1000000) {
case 33: case 33:
axs103_set_freq(1, 1, 1); axs103_set_freq(1, 1, 1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment