Commit 9e314890 authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'openrisc-for-linus' of git://github.com/openrisc/linux

Pull OpenRISC updates from Stafford Horne:
 "Highlights include:

   - optimized memset and memcpy routines, ~20% boot time saving

   - support for cpu idling

   - adding support for l.swa and l.lwa atomic operations (in spec from
     2014)

   - use atomics to implement: bitops, cmpxchg, futex

   - the atomics are in preparation for SMP support"

* tag 'openrisc-for-linus' of git://github.com/openrisc/linux: (25 commits)
  openrisc: head: Init r0 to 0 on start
  openrisc: Export ioremap symbols used by modules
  arch/openrisc/lib/memcpy.c: use correct OR1200 option
  openrisc: head: Remove unused strings
  openrisc: head: Move init strings to rodata section
  openrisc: entry: Fix delay slot detection
  openrisc: entry: Whitespace and comment cleanups
  scripts/checkstack.pl: Add openrisc support
  MAINTAINERS: Add the openrisc official repository
  openrisc: Add .gitignore
  openrisc: Add optimized memcpy routine
  openrisc: Add optimized memset
  openrisc: Initial support for the idle state
  openrisc: Fix the bitmask for the unit present register
  openrisc: remove unnecessary stddef.h include
  openrisc: add futex_atomic_* implementations
  openrisc: add optimized atomic operations
  openrisc: add cmpxchg and xchg implementations
  openrisc: add atomic bitops
  openrisc: add l.lwa/l.swa emulation
  ...
parents f8e6859e a4d44266
......@@ -9315,6 +9315,7 @@ OPENRISC ARCHITECTURE
M: Jonas Bonn <jonas@southpole.se>
M: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
M: Stafford Horne <shorne@gmail.com>
T: git git://github.com/openrisc/linux.git
L: openrisc@lists.librecores.org
W: http://openrisc.io
S: Maintained
......
......@@ -12,6 +12,7 @@ config OPENRISC
select HAVE_MEMBLOCK
select GPIOLIB
select HAVE_ARCH_TRACEHOOK
select SPARSE_IRQ
select GENERIC_IRQ_CHIP
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
......
......@@ -10,4 +10,3 @@ that are due for investigation shortly, i.e. our TODO list:
or1k and this change is slowly trickling through the stack. For the time
being, or32 is equivalent to or1k.
-- Implement optimized version of memcpy and memset
header-y += ucontext.h
generic-y += atomic.h
generic-y += auxvec.h
generic-y += barrier.h
generic-y += bitsperlong.h
......@@ -10,8 +9,6 @@ generic-y += bugs.h
generic-y += cacheflush.h
generic-y += checksum.h
generic-y += clkdev.h
generic-y += cmpxchg-local.h
generic-y += cmpxchg.h
generic-y += current.h
generic-y += device.h
generic-y += div64.h
......@@ -22,12 +19,12 @@ generic-y += exec.h
generic-y += fb.h
generic-y += fcntl.h
generic-y += ftrace.h
generic-y += futex.h
generic-y += hardirq.h
generic-y += hw_irq.h
generic-y += ioctl.h
generic-y += ioctls.h
generic-y += ipcbuf.h
generic-y += irq.h
generic-y += irq_regs.h
generic-y += irq_work.h
generic-y += kdebug.h
......
/*
* Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
*
* This file is licensed under the terms of the GNU General Public License
* version 2. This program is licensed "as is" without any warranty of any
* kind, whether express or implied.
*/
#ifndef __ASM_OPENRISC_ATOMIC_H
#define __ASM_OPENRISC_ATOMIC_H
#include <linux/types.h>
/* Atomically perform op with v->counter and i */
#define ATOMIC_OP(op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
int tmp; \
\
__asm__ __volatile__( \
"1: l.lwa %0,0(%1) \n" \
" l." #op " %0,%0,%2 \n" \
" l.swa 0(%1),%0 \n" \
" l.bnf 1b \n" \
" l.nop \n" \
: "=&r"(tmp) \
: "r"(&v->counter), "r"(i) \
: "cc", "memory"); \
}
/* Atomically perform op with v->counter and i, return the result */
#define ATOMIC_OP_RETURN(op) \
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
int tmp; \
\
__asm__ __volatile__( \
"1: l.lwa %0,0(%1) \n" \
" l." #op " %0,%0,%2 \n" \
" l.swa 0(%1),%0 \n" \
" l.bnf 1b \n" \
" l.nop \n" \
: "=&r"(tmp) \
: "r"(&v->counter), "r"(i) \
: "cc", "memory"); \
\
return tmp; \
}
/* Atomically perform op with v->counter and i, return orig v->counter */
#define ATOMIC_FETCH_OP(op) \
static inline int atomic_fetch_##op(int i, atomic_t *v) \
{ \
int tmp, old; \
\
__asm__ __volatile__( \
"1: l.lwa %0,0(%2) \n" \
" l." #op " %1,%0,%3 \n" \
" l.swa 0(%2),%1 \n" \
" l.bnf 1b \n" \
" l.nop \n" \
: "=&r"(old), "=&r"(tmp) \
: "r"(&v->counter), "r"(i) \
: "cc", "memory"); \
\
return old; \
}
ATOMIC_OP_RETURN(add)
ATOMIC_OP_RETURN(sub)
ATOMIC_FETCH_OP(add)
ATOMIC_FETCH_OP(sub)
ATOMIC_FETCH_OP(and)
ATOMIC_FETCH_OP(or)
ATOMIC_FETCH_OP(xor)
ATOMIC_OP(and)
ATOMIC_OP(or)
ATOMIC_OP(xor)
#undef ATOMIC_FETCH_OP
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#define atomic_add_return atomic_add_return
#define atomic_sub_return atomic_sub_return
#define atomic_fetch_add atomic_fetch_add
#define atomic_fetch_sub atomic_fetch_sub
#define atomic_fetch_and atomic_fetch_and
#define atomic_fetch_or atomic_fetch_or
#define atomic_fetch_xor atomic_fetch_xor
#define atomic_and atomic_and
#define atomic_or atomic_or
#define atomic_xor atomic_xor
/*
* Atomically add a to v->counter as long as v is not already u.
* Returns the original value at v->counter.
*
* This is often used through atomic_inc_not_zero()
*/
static inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
int old, tmp;
__asm__ __volatile__(
"1: l.lwa %0, 0(%2) \n"
" l.sfeq %0, %4 \n"
" l.bf 2f \n"
" l.add %1, %0, %3 \n"
" l.swa 0(%2), %1 \n"
" l.bnf 1b \n"
" l.nop \n"
"2: \n"
: "=&r"(old), "=&r" (tmp)
: "r"(&v->counter), "r"(a), "r"(u)
: "cc", "memory");
return old;
}
#define __atomic_add_unless __atomic_add_unless
#include <asm-generic/atomic.h>
#endif /* __ASM_OPENRISC_ATOMIC_H */
......@@ -45,7 +45,7 @@
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/atomic.h>
#include <asm/bitops/atomic.h>
#include <asm-generic/bitops/non-atomic.h>
#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic.h>
......
/*
* Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
*
* This file is licensed under the terms of the GNU General Public License
* version 2. This program is licensed "as is" without any warranty of any
* kind, whether express or implied.
*/
#ifndef __ASM_OPENRISC_BITOPS_ATOMIC_H
#define __ASM_OPENRISC_BITOPS_ATOMIC_H
static inline void set_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
unsigned long tmp;
__asm__ __volatile__(
"1: l.lwa %0,0(%1) \n"
" l.or %0,%0,%2 \n"
" l.swa 0(%1),%0 \n"
" l.bnf 1b \n"
" l.nop \n"
: "=&r"(tmp)
: "r"(p), "r"(mask)
: "cc", "memory");
}
static inline void clear_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
unsigned long tmp;
__asm__ __volatile__(
"1: l.lwa %0,0(%1) \n"
" l.and %0,%0,%2 \n"
" l.swa 0(%1),%0 \n"
" l.bnf 1b \n"
" l.nop \n"
: "=&r"(tmp)
: "r"(p), "r"(~mask)
: "cc", "memory");
}
static inline void change_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
unsigned long tmp;
__asm__ __volatile__(
"1: l.lwa %0,0(%1) \n"
" l.xor %0,%0,%2 \n"
" l.swa 0(%1),%0 \n"
" l.bnf 1b \n"
" l.nop \n"
: "=&r"(tmp)
: "r"(p), "r"(mask)
: "cc", "memory");
}
static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
unsigned long old;
unsigned long tmp;
__asm__ __volatile__(
"1: l.lwa %0,0(%2) \n"
" l.or %1,%0,%3 \n"
" l.swa 0(%2),%1 \n"
" l.bnf 1b \n"
" l.nop \n"
: "=&r"(old), "=&r"(tmp)
: "r"(p), "r"(mask)
: "cc", "memory");
return (old & mask) != 0;
}
static inline int test_and_clear_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
unsigned long old;
unsigned long tmp;
__asm__ __volatile__(
"1: l.lwa %0,0(%2) \n"
" l.and %1,%0,%3 \n"
" l.swa 0(%2),%1 \n"
" l.bnf 1b \n"
" l.nop \n"
: "=&r"(old), "=&r"(tmp)
: "r"(p), "r"(~mask)
: "cc", "memory");
return (old & mask) != 0;
}
static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
unsigned long old;
unsigned long tmp;
__asm__ __volatile__(
"1: l.lwa %0,0(%2) \n"
" l.xor %1,%0,%3 \n"
" l.swa 0(%2),%1 \n"
" l.bnf 1b \n"
" l.nop \n"
: "=&r"(old), "=&r"(tmp)
: "r"(p), "r"(mask)
: "cc", "memory");
return (old & mask) != 0;
}
#endif /* __ASM_OPENRISC_BITOPS_ATOMIC_H */
/*
* Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
*
* This file is licensed under the terms of the GNU General Public License
* version 2. This program is licensed "as is" without any warranty of any
* kind, whether express or implied.
*/
#ifndef __ASM_OPENRISC_CMPXCHG_H
#define __ASM_OPENRISC_CMPXCHG_H
#include <linux/types.h>
/*
* This function doesn't exist, so you'll get a linker error
* if something tries to do an invalid cmpxchg().
*/
extern void __cmpxchg_called_with_bad_pointer(void);
#define __HAVE_ARCH_CMPXCHG 1
static inline unsigned long
__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
{
if (size != 4) {
__cmpxchg_called_with_bad_pointer();
return old;
}
__asm__ __volatile__(
"1: l.lwa %0, 0(%1) \n"
" l.sfeq %0, %2 \n"
" l.bnf 2f \n"
" l.nop \n"
" l.swa 0(%1), %3 \n"
" l.bnf 1b \n"
" l.nop \n"
"2: \n"
: "=&r"(old)
: "r"(ptr), "r"(old), "r"(new)
: "cc", "memory");
return old;
}
#define cmpxchg(ptr, o, n) \
({ \
(__typeof__(*(ptr))) __cmpxchg((ptr), \
(unsigned long)(o), \
(unsigned long)(n), \
sizeof(*(ptr))); \
})
/*
* This function doesn't exist, so you'll get a linker error if
* something tries to do an invalidly-sized xchg().
*/
extern void __xchg_called_with_bad_pointer(void);
static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
int size)
{
if (size != 4) {
__xchg_called_with_bad_pointer();
return val;
}
__asm__ __volatile__(
"1: l.lwa %0, 0(%1) \n"
" l.swa 0(%1), %2 \n"
" l.bnf 1b \n"
" l.nop \n"
: "=&r"(val)
: "r"(ptr), "r"(val)
: "cc", "memory");
return val;
}
#define xchg(ptr, with) \
((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr))))
#endif /* __ASM_OPENRISC_CMPXCHG_H */
......@@ -24,9 +24,11 @@ struct cpuinfo {
u32 icache_size;
u32 icache_block_size;
u32 icache_ways;
u32 dcache_size;
u32 dcache_block_size;
u32 dcache_ways;
};
extern struct cpuinfo cpuinfo;
......
#ifndef __ASM_OPENRISC_FUTEX_H
#define __ASM_OPENRISC_FUTEX_H
#ifdef __KERNEL__
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <asm/errno.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
({ \
__asm__ __volatile__ ( \
"1: l.lwa %0, %2 \n" \
insn "\n" \
"2: l.swa %2, %1 \n" \
" l.bnf 1b \n" \
" l.ori %1, r0, 0 \n" \
"3: \n" \
".section .fixup,\"ax\" \n" \
"4: l.j 3b \n" \
" l.addi %1, r0, %3 \n" \
".previous \n" \
".section __ex_table,\"a\" \n" \
".word 1b,4b,2b,4b \n" \
".previous \n" \
: "=&r" (oldval), "=&r" (ret), "+m" (*uaddr) \
: "i" (-EFAULT), "r" (oparg) \
: "cc", "memory" \
); \
})
static inline int
futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
pagefault_disable();
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op("l.or %1,%4,%4", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ADD:
__futex_atomic_op("l.add %1,%0,%4", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_OR:
__futex_atomic_op("l.or %1,%0,%4", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ANDN:
__futex_atomic_op("l.and %1,%0,%4", ret, oldval, uaddr, ~oparg);
break;
case FUTEX_OP_XOR:
__futex_atomic_op("l.xor %1,%0,%4", ret, oldval, uaddr, oparg);
break;
default:
ret = -ENOSYS;
}
pagefault_enable();
if (!ret) {
switch (cmp) {
case FUTEX_OP_CMP_EQ:
ret = (oldval == cmparg);
break;
case FUTEX_OP_CMP_NE:
ret = (oldval != cmparg);
break;
case FUTEX_OP_CMP_LT:
ret = (oldval < cmparg);
break;
case FUTEX_OP_CMP_GE:
ret = (oldval >= cmparg);
break;
case FUTEX_OP_CMP_LE:
ret = (oldval <= cmparg);
break;
case FUTEX_OP_CMP_GT:
ret = (oldval > cmparg);
break;
default:
ret = -ENOSYS;
}
}
return ret;
}
static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
int ret = 0;
u32 prev;
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
__asm__ __volatile__ ( \
"1: l.lwa %1, %2 \n" \
" l.sfeq %1, %3 \n" \
" l.bnf 3f \n" \
" l.nop \n" \
"2: l.swa %2, %4 \n" \
" l.bnf 1b \n" \
" l.nop \n" \
"3: \n" \
".section .fixup,\"ax\" \n" \
"4: l.j 3b \n" \
" l.addi %0, r0, %5 \n" \
".previous \n" \
".section __ex_table,\"a\" \n" \
".word 1b,4b,2b,4b \n" \
".previous \n" \
: "+r" (ret), "=&r" (prev), "+m" (*uaddr) \
: "r" (oldval), "r" (newval), "i" (-EFAULT) \
: "cc", "memory" \
);
*uval = prev;
return ret;
}
#endif /* __KERNEL__ */
#endif /* __ASM_OPENRISC_FUTEX_H */
......@@ -152,8 +152,8 @@
#define SPR_UPR_MP 0x00000020 /* MAC present */
#define SPR_UPR_DUP 0x00000040 /* Debug unit present */
#define SPR_UPR_PCUP 0x00000080 /* Performance counters unit present */
#define SPR_UPR_PMP 0x00000100 /* Power management present */
#define SPR_UPR_PICP 0x00000200 /* PIC present */
#define SPR_UPR_PICP 0x00000100 /* PIC present */
#define SPR_UPR_PMP 0x00000200 /* Power management present */
#define SPR_UPR_TTP 0x00000400 /* Tick timer present */
#define SPR_UPR_RES 0x00fe0000 /* Reserved */
#define SPR_UPR_CUP 0xff000000 /* Context units present */
......
#ifndef __ASM_OPENRISC_STRING_H
#define __ASM_OPENRISC_STRING_H
#define __HAVE_ARCH_MEMSET
extern void *memset(void *s, int c, __kernel_size_t n);
#define __HAVE_ARCH_MEMCPY
extern void *memcpy(void *dest, __const void *src, __kernel_size_t n);
#endif /* __ASM_OPENRISC_STRING_H */
......@@ -173,6 +173,11 @@ handler: ;\
l.j _ret_from_exception ;\
l.nop
/* clobbers 'reg' */
#define CLEAR_LWA_FLAG(reg) \
l.movhi reg,hi(lwa_flag) ;\
l.ori reg,reg,lo(lwa_flag) ;\
l.sw 0(reg),r0
/*
* NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR
* contain the same values as when exception we're handling
......@@ -193,6 +198,7 @@ EXCEPTION_ENTRY(_tng_kernel_start)
/* ---[ 0x200: BUS exception ]------------------------------------------- */
EXCEPTION_ENTRY(_bus_fault_handler)
CLEAR_LWA_FLAG(r3)
/* r4: EA of fault (set by EXCEPTION_HANDLE) */
l.jal do_bus_fault
l.addi r3,r1,0 /* pt_regs */
......@@ -202,11 +208,13 @@ EXCEPTION_ENTRY(_bus_fault_handler)
/* ---[ 0x300: Data Page Fault exception ]------------------------------- */
EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler)
CLEAR_LWA_FLAG(r3)
l.and r5,r5,r0
l.j 1f
l.nop
EXCEPTION_ENTRY(_data_page_fault_handler)
CLEAR_LWA_FLAG(r3)
/* set up parameters for do_page_fault */
l.ori r5,r0,0x300 // exception vector
1:
......@@ -220,7 +228,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
* DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part
*/
#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
l.lwz r6,PT_PC(r3) // address of an offending insn
l.lwz r6,PT_PC(r3) // address of an offending insn
l.lwz r6,0(r6) // instruction that caused pf
l.srli r6,r6,26 // check opcode for jump insn
......@@ -236,57 +244,57 @@ EXCEPTION_ENTRY(_data_page_fault_handler)
l.bf 8f
l.sfeqi r6,0x12 // l.jalr
l.bf 8f
l.nop
l.nop
l.j 9f
l.nop
8:
l.nop
l.lwz r6,PT_PC(r3) // address of an offending insn
8: // offending insn is in delay slot
l.lwz r6,PT_PC(r3) // address of an offending insn
l.addi r6,r6,4
l.lwz r6,0(r6) // instruction that caused pf
l.srli r6,r6,26 // get opcode
9:
9: // offending instruction opcode loaded in r6
#else
l.mfspr r6,r0,SPR_SR // SR
// l.lwz r6,PT_SR(r3) // ESR
l.andi r6,r6,SPR_SR_DSX // check for delay slot exception
l.sfeqi r6,0x1 // exception happened in delay slot
l.bnf 7f
l.lwz r6,PT_PC(r3) // address of an offending insn
l.lwz r6,PT_SR(r3) // SR
l.andi r6,r6,SPR_SR_DSX // check for delay slot exception
l.sfne r6,r0 // exception happened in delay slot
l.bnf 7f
l.lwz r6,PT_PC(r3) // address of an offending insn
l.addi r6,r6,4 // offending insn is in delay slot
l.addi r6,r6,4 // offending insn is in delay slot
7:
l.lwz r6,0(r6) // instruction that caused pf
l.srli r6,r6,26 // check opcode for write access
#endif
l.sfgeui r6,0x33 // check opcode for write access
l.sfgeui r6,0x33 // check opcode for write access
l.bnf 1f
l.sfleui r6,0x37
l.bnf 1f
l.ori r6,r0,0x1 // write access
l.j 2f
l.nop
l.nop
1: l.ori r6,r0,0x0 // !write access
2:
/* call fault.c handler in or32/mm/fault.c */
l.jal do_page_fault
l.nop
l.nop
l.j _ret_from_exception
l.nop
l.nop
/* ---[ 0x400: Insn Page Fault exception ]------------------------------- */
EXCEPTION_ENTRY(_itlb_miss_page_fault_handler)
CLEAR_LWA_FLAG(r3)
l.and r5,r5,r0
l.j 1f
l.nop
EXCEPTION_ENTRY(_insn_page_fault_handler)
CLEAR_LWA_FLAG(r3)
/* set up parameters for do_page_fault */
l.ori r5,r0,0x400 // exception vector
1:
......@@ -296,14 +304,15 @@ EXCEPTION_ENTRY(_insn_page_fault_handler)
/* call fault.c handler in or32/mm/fault.c */
l.jal do_page_fault
l.nop
l.nop
l.j _ret_from_exception
l.nop
l.nop
/* ---[ 0x500: Timer exception ]----------------------------------------- */
EXCEPTION_ENTRY(_timer_handler)
CLEAR_LWA_FLAG(r3)
l.jal timer_interrupt
l.addi r3,r1,0 /* pt_regs */
......@@ -313,6 +322,7 @@ EXCEPTION_ENTRY(_timer_handler)
/* ---[ 0x600: Aligment exception ]-------------------------------------- */
EXCEPTION_ENTRY(_alignment_handler)
CLEAR_LWA_FLAG(r3)
/* r4: EA of fault (set by EXCEPTION_HANDLE) */
l.jal do_unaligned_access
l.addi r3,r1,0 /* pt_regs */
......@@ -509,6 +519,7 @@ EXCEPTION_ENTRY(_external_irq_handler)
// l.sw PT_SR(r1),r4
1:
#endif
CLEAR_LWA_FLAG(r3)
l.addi r3,r1,0
l.movhi r8,hi(do_IRQ)
l.ori r8,r8,lo(do_IRQ)
......@@ -556,8 +567,12 @@ ENTRY(_sys_call_handler)
* they should be clobbered, otherwise
*/
l.sw PT_GPR3(r1),r3
/* r4 already saved */
/* r4 holds the EEAR address of the fault, load the original r4 */
/*
* r4 already saved
* r4 holds the EEAR address of the fault, use it as screatch reg and
* then load the original r4
*/
CLEAR_LWA_FLAG(r4)
l.lwz r4,PT_GPR4(r1)
l.sw PT_GPR5(r1),r5
l.sw PT_GPR6(r1),r6
......@@ -776,6 +791,7 @@ UNHANDLED_EXCEPTION(_vector_0xd00,0xd00)
/* ---[ 0xe00: Trap exception ]------------------------------------------ */
EXCEPTION_ENTRY(_trap_handler)
CLEAR_LWA_FLAG(r3)
/* r4: EA of fault (set by EXCEPTION_HANDLE) */
l.jal do_trap
l.addi r3,r1,0 /* pt_regs */
......
This diff is collapsed.
......@@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3);
DECLARE_EXPORT(__lshrdi3);
EXPORT_SYMBOL(__copy_tofrom_user);
EXPORT_SYMBOL(memset);
......@@ -75,6 +75,17 @@ void machine_power_off(void)
__asm__("l.nop 1");
}
/*
* Send the doze signal to the cpu if available.
* Make sure, that all interrupts are enabled
*/
void arch_cpu_idle(void)
{
local_irq_enable();
if (mfspr(SPR_UPR) & SPR_UPR_PMP)
mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
}
void (*pm_power_off) (void) = machine_power_off;
/*
......@@ -226,6 +237,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu)
extern struct thread_info *_switch(struct thread_info *old_ti,
struct thread_info *new_ti);
extern int lwa_flag;
struct task_struct *__switch_to(struct task_struct *old,
struct task_struct *new)
......@@ -243,6 +255,8 @@ struct task_struct *__switch_to(struct task_struct *old,
new_ti = new->stack;
old_ti = old->stack;
lwa_flag = 0;
current_thread_info_set[smp_processor_id()] = new_ti;
last = (_switch(old_ti, new_ti))->task;
......
......@@ -16,7 +16,6 @@
* 2 of the License, or (at your option) any later version.
*/
#include <stddef.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
......
......@@ -117,13 +117,15 @@ static void print_cpuinfo(void)
if (upr & SPR_UPR_DCP)
printk(KERN_INFO
"-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
cpuinfo.dcache_size, cpuinfo.dcache_block_size, 1);
cpuinfo.dcache_size, cpuinfo.dcache_block_size,
cpuinfo.dcache_ways);
else
printk(KERN_INFO "-- dcache disabled\n");
if (upr & SPR_UPR_ICP)
printk(KERN_INFO
"-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
cpuinfo.icache_size, cpuinfo.icache_block_size, 1);
cpuinfo.icache_size, cpuinfo.icache_block_size,
cpuinfo.icache_ways);
else
printk(KERN_INFO "-- icache disabled\n");
......@@ -155,25 +157,25 @@ void __init setup_cpuinfo(void)
{
struct device_node *cpu;
unsigned long iccfgr, dccfgr;
unsigned long cache_set_size, cache_ways;
unsigned long cache_set_size;
cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481");
if (!cpu)
panic("No compatible CPU found in device tree...\n");
iccfgr = mfspr(SPR_ICCFGR);
cache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
cpuinfo.icache_size =
cache_set_size * cache_ways * cpuinfo.icache_block_size;
cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size;
dccfgr = mfspr(SPR_DCCFGR);
cache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
cpuinfo.dcache_size =
cache_set_size * cache_ways * cpuinfo.dcache_block_size;
cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size;
if (of_property_read_u32(cpu, "clock-frequency",
&cpuinfo.clock_frequency)) {
......@@ -308,30 +310,33 @@ static int show_cpuinfo(struct seq_file *m, void *v)
revision = vr & SPR_VR_REV;
seq_printf(m,
"cpu\t\t: OpenRISC-%x\n"
"revision\t: %d\n"
"frequency\t: %ld\n"
"dcache size\t: %d bytes\n"
"dcache block size\t: %d bytes\n"
"icache size\t: %d bytes\n"
"icache block size\t: %d bytes\n"
"immu\t\t: %d entries, %lu ways\n"
"dmmu\t\t: %d entries, %lu ways\n"
"bogomips\t: %lu.%02lu\n",
version,
revision,
loops_per_jiffy * HZ,
cpuinfo.dcache_size,
cpuinfo.dcache_block_size,
cpuinfo.icache_size,
cpuinfo.icache_block_size,
1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
(loops_per_jiffy * HZ) / 500000,
((loops_per_jiffy * HZ) / 5000) % 100);
"cpu\t\t: OpenRISC-%x\n"
"revision\t: %d\n"
"frequency\t: %ld\n"
"dcache size\t: %d bytes\n"
"dcache block size\t: %d bytes\n"
"dcache ways\t: %d\n"
"icache size\t: %d bytes\n"
"icache block size\t: %d bytes\n"
"icache ways\t: %d\n"
"immu\t\t: %d entries, %lu ways\n"
"dmmu\t\t: %d entries, %lu ways\n"
"bogomips\t: %lu.%02lu\n",
version,
revision,
loops_per_jiffy * HZ,
cpuinfo.dcache_size,
cpuinfo.dcache_block_size,
cpuinfo.dcache_ways,
cpuinfo.icache_size,
cpuinfo.icache_block_size,
cpuinfo.icache_ways,
1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
(loops_per_jiffy * HZ) / 500000,
((loops_per_jiffy * HZ) / 5000) % 100);
return 0;
}
......
......@@ -40,6 +40,8 @@
extern char _etext, _stext;
int kstack_depth_to_print = 0x180;
int lwa_flag;
unsigned long __user *lwa_addr;
static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
{
......@@ -334,10 +336,191 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address)
}
}
static inline int in_delay_slot(struct pt_regs *regs)
{
#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX
/* No delay slot flag, do the old way */
unsigned int op, insn;
insn = *((unsigned int *)regs->pc);
op = insn >> 26;
switch (op) {
case 0x00: /* l.j */
case 0x01: /* l.jal */
case 0x03: /* l.bnf */
case 0x04: /* l.bf */
case 0x11: /* l.jr */
case 0x12: /* l.jalr */
return 1;
default:
return 0;
}
#else
return regs->sr & SPR_SR_DSX;
#endif
}
static inline void adjust_pc(struct pt_regs *regs, unsigned long address)
{
int displacement;
unsigned int rb, op, jmp;
if (unlikely(in_delay_slot(regs))) {
/* In delay slot, instruction at pc is a branch, simulate it */
jmp = *((unsigned int *)regs->pc);
displacement = sign_extend32(((jmp) & 0x3ffffff) << 2, 27);
rb = (jmp & 0x0000ffff) >> 11;
op = jmp >> 26;
switch (op) {
case 0x00: /* l.j */
regs->pc += displacement;
return;
case 0x01: /* l.jal */
regs->pc += displacement;
regs->gpr[9] = regs->pc + 8;
return;
case 0x03: /* l.bnf */
if (regs->sr & SPR_SR_F)
regs->pc += 8;
else
regs->pc += displacement;
return;
case 0x04: /* l.bf */
if (regs->sr & SPR_SR_F)
regs->pc += displacement;
else
regs->pc += 8;
return;
case 0x11: /* l.jr */
regs->pc = regs->gpr[rb];
return;
case 0x12: /* l.jalr */
regs->pc = regs->gpr[rb];
regs->gpr[9] = regs->pc + 8;
return;
default:
break;
}
} else {
regs->pc += 4;
}
}
static inline void simulate_lwa(struct pt_regs *regs, unsigned long address,
unsigned int insn)
{
unsigned int ra, rd;
unsigned long value;
unsigned long orig_pc;
long imm;
const struct exception_table_entry *entry;
orig_pc = regs->pc;
adjust_pc(regs, address);
ra = (insn >> 16) & 0x1f;
rd = (insn >> 21) & 0x1f;
imm = (short)insn;
lwa_addr = (unsigned long __user *)(regs->gpr[ra] + imm);
if ((unsigned long)lwa_addr & 0x3) {
do_unaligned_access(regs, address);
return;
}
if (get_user(value, lwa_addr)) {
if (user_mode(regs)) {
force_sig(SIGSEGV, current);
return;
}
if ((entry = search_exception_tables(orig_pc))) {
regs->pc = entry->fixup;
return;
}
/* kernel access in kernel space, load it directly */
value = *((unsigned long *)lwa_addr);
}
lwa_flag = 1;
regs->gpr[rd] = value;
}
static inline void simulate_swa(struct pt_regs *regs, unsigned long address,
unsigned int insn)
{
unsigned long __user *vaddr;
unsigned long orig_pc;
unsigned int ra, rb;
long imm;
const struct exception_table_entry *entry;
orig_pc = regs->pc;
adjust_pc(regs, address);
ra = (insn >> 16) & 0x1f;
rb = (insn >> 11) & 0x1f;
imm = (short)(((insn & 0x2200000) >> 10) | (insn & 0x7ff));
vaddr = (unsigned long __user *)(regs->gpr[ra] + imm);
if (!lwa_flag || vaddr != lwa_addr) {
regs->sr &= ~SPR_SR_F;
return;
}
if ((unsigned long)vaddr & 0x3) {
do_unaligned_access(regs, address);
return;
}
if (put_user(regs->gpr[rb], vaddr)) {
if (user_mode(regs)) {
force_sig(SIGSEGV, current);
return;
}
if ((entry = search_exception_tables(orig_pc))) {
regs->pc = entry->fixup;
return;
}
/* kernel access in kernel space, store it directly */
*((unsigned long *)vaddr) = regs->gpr[rb];
}
lwa_flag = 0;
regs->sr |= SPR_SR_F;
}
#define INSN_LWA 0x1b
#define INSN_SWA 0x33
asmlinkage void do_illegal_instruction(struct pt_regs *regs,
unsigned long address)
{
siginfo_t info;
unsigned int op;
unsigned int insn = *((unsigned int *)address);
op = insn >> 26;
switch (op) {
case INSN_LWA:
simulate_lwa(regs, address, insn);
return;
case INSN_SWA:
simulate_swa(regs, address, insn);
return;
default:
break;
}
if (user_mode(regs)) {
/* Send a SIGILL */
......
......@@ -2,4 +2,4 @@
# Makefile for or32 specific library files..
#
obj-y = string.o delay.o
obj-y := delay.o string.o memset.o memcpy.o
/*
* arch/openrisc/lib/memcpy.c
*
* Optimized memory copy routines for openrisc. These are mostly copied
* from ohter sources but slightly entended based on ideas discuassed in
* #openrisc.
*
* The word unroll implementation is an extension to the arm byte
* unrolled implementation, but using word copies (if things are
* properly aligned)
*
* The great arm loop unroll algorithm can be found at:
* arch/arm/boot/compressed/string.c
*/
#include <linux/export.h>
#include <linux/string.h>
#ifdef CONFIG_OR1K_1200
/*
* Do memcpy with word copies and loop unrolling. This gives the
* best performance on the OR1200 and MOR1KX archirectures
*/
void *memcpy(void *dest, __const void *src, __kernel_size_t n)
{
int i = 0;
unsigned char *d, *s;
uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
/* If both source and dest are word aligned copy words */
if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
/* Copy 32 bytes per loop */
for (i = n >> 5; i > 0; i--) {
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
}
if (n & 1 << 4) {
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
}
if (n & 1 << 3) {
*dest_w++ = *src_w++;
*dest_w++ = *src_w++;
}
if (n & 1 << 2)
*dest_w++ = *src_w++;
d = (unsigned char *)dest_w;
s = (unsigned char *)src_w;
} else {
d = (unsigned char *)dest_w;
s = (unsigned char *)src_w;
for (i = n >> 3; i > 0; i--) {
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
}
if (n & 1 << 2) {
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
*d++ = *s++;
}
}
if (n & 1 << 1) {
*d++ = *s++;
*d++ = *s++;
}
if (n & 1)
*d++ = *s++;
return dest;
}
#else
/*
* Use word copies but no loop unrolling as we cannot assume there
* will be benefits on the archirecture
*/
void *memcpy(void *dest, __const void *src, __kernel_size_t n)
{
unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src;
uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src;
/* If both source and dest are word aligned copy words */
if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) {
for (; n >= 4; n -= 4)
*dest_w++ = *src_w++;
}
d = (unsigned char *)dest_w;
s = (unsigned char *)src_w;
/* For remaining or if not aligned, copy bytes */
for (; n >= 1; n -= 1)
*d++ = *s++;
return dest;
}
#endif
EXPORT_SYMBOL(memcpy);
/*
* OpenRISC memset.S
*
* Hand-optimized assembler version of memset for OpenRISC.
* Algorithm inspired by several other arch-specific memset routines
* in the kernel tree
*
* Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
.global memset
.type memset, @function
memset:
/* arguments:
* r3 = *s
* r4 = c
* r5 = n
* r13, r15, r17, r19 used as temp regs
*/
/* Exit if n == 0 */
l.sfeqi r5, 0
l.bf 4f
/* Truncate c to char */
l.andi r13, r4, 0xff
/* Skip word extension if c is 0 */
l.sfeqi r13, 0
l.bf 1f
/* Check for at least two whole words (8 bytes) */
l.sfleui r5, 7
/* Extend char c to 32-bit word cccc in r13 */
l.slli r15, r13, 16 // r13 = 000c, r15 = 0c00
l.or r13, r13, r15 // r13 = 0c0c, r15 = 0c00
l.slli r15, r13, 8 // r13 = 0c0c, r15 = c0c0
l.or r13, r13, r15 // r13 = cccc, r15 = c0c0
1: l.addi r19, r3, 0 // Set r19 = src
/* Jump to byte copy loop if less than two words */
l.bf 3f
l.or r17, r5, r0 // Set r17 = n
/* Mask out two LSBs to check alignment */
l.andi r15, r3, 0x3
/* lsb == 00, jump to word copy loop */
l.sfeqi r15, 0
l.bf 2f
l.addi r19, r3, 0 // Set r19 = src
/* lsb == 01,10 or 11 */
l.sb 0(r3), r13 // *src = c
l.addi r17, r17, -1 // Decrease n
l.sfeqi r15, 3
l.bf 2f
l.addi r19, r3, 1 // src += 1
/* lsb == 01 or 10 */
l.sb 1(r3), r13 // *(src+1) = c
l.addi r17, r17, -1 // Decrease n
l.sfeqi r15, 2
l.bf 2f
l.addi r19, r3, 2 // src += 2
/* lsb == 01 */
l.sb 2(r3), r13 // *(src+2) = c
l.addi r17, r17, -1 // Decrease n
l.addi r19, r3, 3 // src += 3
/* Word copy loop */
2: l.sw 0(r19), r13 // *src = cccc
l.addi r17, r17, -4 // Decrease n
l.sfgeui r17, 4
l.bf 2b
l.addi r19, r19, 4 // Increase src
/* When n > 0, copy the remaining bytes, otherwise jump to exit */
l.sfeqi r17, 0
l.bf 4f
/* Byte copy loop */
3: l.addi r17, r17, -1 // Decrease n
l.sb 0(r19), r13 // *src = cccc
l.sfnei r17, 0
l.bf 3b
l.addi r19, r19, 1 // Increase src
4: l.jr r9
l.ori r11, r3, 0
......@@ -80,6 +80,7 @@ __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot)
return (void __iomem *)(offset + (char *)v);
}
EXPORT_SYMBOL(__ioremap);
void iounmap(void *addr)
{
......@@ -106,6 +107,7 @@ void iounmap(void *addr)
return vfree((void *)(PAGE_MASK & (unsigned long)addr));
}
EXPORT_SYMBOL(iounmap);
/**
* OK, this one's a bit tricky... ioremap can get called before memory is
......
......@@ -223,6 +223,7 @@ static inline void atomic_dec(atomic_t *v)
#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v)))
#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new)))
#ifndef __atomic_add_unless
static inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
int c, old;
......@@ -231,5 +232,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u)
c = old;
return c;
}
#endif
#endif /* __ASM_GENERIC_ATOMIC_H */
......@@ -81,6 +81,9 @@ my (@stack, $re, $dre, $x, $xs, $funcre);
} elsif ($arch eq 'nios2') {
#25a8: defffb04 addi sp,sp,-20
$re = qr/.*addi.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o;
} elsif ($arch eq 'openrisc') {
# c000043c: 9c 21 fe f0 l.addi r1,r1,-272
$re = qr/.*l\.addi.*r1,r1,-(([0-9]{2}|[3-9])[0-9]{2})/o;
} elsif ($arch eq 'parisc' || $arch eq 'parisc64') {
$re = qr/.*ldo ($x{1,8})\(sp\),sp/o;
} elsif ($arch eq 'ppc') {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment