Commit 619b6e18 authored by Maciej W. Rozycki's avatar Maciej W. Rozycki Committed by Ralf Baechle

[MIPS] R4000/R4400 daddiu erratum workaround

 This complements the generic R4000/R4400 errata workaround code and adds 
bits for the daddiu problem.  In most places it just modifies handwritten 
assembly code so that the assembler is allowed to use a temporary register 
as daddiu may now be treated as a macro that expands to a sequence of li 
and daddu.  It is the AT register or, where AT is unavailable or used 
explicitly for another purpose, an explicitly-named register is selected, 
using the .set at=<reg> feature added recently to gas.  This feature is 
only used if CONFIG_CPU_DADDI_WORKAROUNDS has been set, so if the 
workaround remains disabled, the required version of binutils stays 
unchanged.

 Similarly, daddiu instructions put in branch delay slots in noreorder 
fragments are now taken out of them and the assembler is allowed to 
reorder them itself as possible (which it does making the whole idea of 
scheduling them into delay slots manually questionable).

 Also in the very few places where such a simple conversion was not 
possible, a handcoded longer sequence is implemented.

 Other than that there are changes to code responsible for building the 
TLB fault and page clear/copy handlers to avoid daddiu as appropriate.  
These are only effective if the erratum is verified to be present at the 
run time.

 Finally there is a trivial update to __delay(), because it uses daddiu in 
a branch delay slot.
Signed-off-by: default avatarMaciej W. Rozycki <macro@linux-mips.org>
Signed-off-by: default avatarRalf Baechle <ralf@linux-mips.org>
parent 20d60d99
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle * Copyright (C) 1994 - 2000, 2001, 2003 Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc. * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2001 MIPS Technologies, Inc. * Copyright (C) 2001 MIPS Technologies, Inc.
* Copyright (C) 2002 Maciej W. Rozycki * Copyright (C) 2002, 2007 Maciej W. Rozycki
*/ */
#include <linux/init.h> #include <linux/init.h>
...@@ -471,7 +471,13 @@ NESTED(nmi_handler, PT_SIZE, sp) ...@@ -471,7 +471,13 @@ NESTED(nmi_handler, PT_SIZE, sp)
jr k0 jr k0
rfe rfe
#else #else
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
LONG_ADDIU k0, 4 /* stall on $k0 */ LONG_ADDIU k0, 4 /* stall on $k0 */
#else
.set at=v1
LONG_ADDIU k0, 4
.set noat
#endif
MTC0 k0, CP0_EPC MTC0 k0, CP0_EPC
/* I hope three instructions between MTC0 and ERET are enough... */ /* I hope three instructions between MTC0 and ERET are enough... */
ori k1, _THREAD_MASK ori k1, _THREAD_MASK
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
* *
* Copyright (C) 1998, 1999 Ralf Baechle * Copyright (C) 1998, 1999 Ralf Baechle
* Copyright (C) 1999 Silicon Graphics, Inc. * Copyright (C) 1999 Silicon Graphics, Inc.
* Copyright (C) 2007 Maciej W. Rozycki
*/ */
#include <linux/errno.h> #include <linux/errno.h>
#include <asm/asm.h> #include <asm/asm.h>
...@@ -52,9 +53,12 @@ ...@@ -52,9 +53,12 @@
#define UNIT(unit) ((unit)*NBYTES) #define UNIT(unit) ((unit)*NBYTES)
#define ADDC(sum,reg) \ #define ADDC(sum,reg) \
.set push; \
.set noat; \
ADD sum, reg; \ ADD sum, reg; \
sltu v1, sum, reg; \ sltu v1, sum, reg; \
ADD sum, v1 ADD sum, v1; \
.set pop
#define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \ #define CSUM_BIGCHUNK1(src, offset, sum, _t0, _t1, _t2, _t3) \
LOAD _t0, (offset + UNIT(0))(src); \ LOAD _t0, (offset + UNIT(0))(src); \
...@@ -178,8 +182,10 @@ move_128bytes: ...@@ -178,8 +182,10 @@ move_128bytes:
CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x40, sum, t0, t1, t3, t4)
CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4) CSUM_BIGCHUNK(src, 0x60, sum, t0, t1, t3, t4)
LONG_SUBU t8, t8, 0x01 LONG_SUBU t8, t8, 0x01
.set reorder /* DADDI_WAR */
PTR_ADDU src, src, 0x80
bnez t8, move_128bytes bnez t8, move_128bytes
PTR_ADDU src, src, 0x80 .set noreorder
1: 1:
beqz t2, 1f beqz t2, 1f
...@@ -208,8 +214,10 @@ end_words: ...@@ -208,8 +214,10 @@ end_words:
lw t0, (src) lw t0, (src)
LONG_SUBU t8, t8, 0x1 LONG_SUBU t8, t8, 0x1
ADDC(sum, t0) ADDC(sum, t0)
.set reorder /* DADDI_WAR */
PTR_ADDU src, src, 0x4
bnez t8, end_words bnez t8, end_words
PTR_ADDU src, src, 0x4 .set noreorder
/* unknown src alignment and < 8 bytes to go */ /* unknown src alignment and < 8 bytes to go */
small_csumcpy: small_csumcpy:
...@@ -246,6 +254,8 @@ small_csumcpy: ...@@ -246,6 +254,8 @@ small_csumcpy:
1: ADDC(sum, t1) 1: ADDC(sum, t1)
/* fold checksum */ /* fold checksum */
.set push
.set noat
#ifdef USE_DOUBLE #ifdef USE_DOUBLE
dsll32 v1, sum, 0 dsll32 v1, sum, 0
daddu sum, v1 daddu sum, v1
...@@ -266,6 +276,7 @@ small_csumcpy: ...@@ -266,6 +276,7 @@ small_csumcpy:
srl sum, sum, 8 srl sum, sum, 8
or sum, v1 or sum, v1
andi sum, 0xffff andi sum, 0xffff
.set pop
1: 1:
.set reorder .set reorder
/* Add the passed partial csum. */ /* Add the passed partial csum. */
...@@ -373,7 +384,11 @@ small_csumcpy: ...@@ -373,7 +384,11 @@ small_csumcpy:
#define ADDRMASK (NBYTES-1) #define ADDRMASK (NBYTES-1)
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
.set noat .set noat
#else
.set at=v1
#endif
LEAF(__csum_partial_copy_user) LEAF(__csum_partial_copy_user)
PTR_ADDU AT, src, len /* See (1) above. */ PTR_ADDU AT, src, len /* See (1) above. */
...@@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc) ...@@ -441,8 +456,10 @@ EXC( STORE t6, UNIT(6)(dst), s_exc)
ADDC(sum, t6) ADDC(sum, t6)
EXC( STORE t7, UNIT(7)(dst), s_exc) EXC( STORE t7, UNIT(7)(dst), s_exc)
ADDC(sum, t7) ADDC(sum, t7)
.set reorder /* DADDI_WAR */
ADD dst, dst, 8*NBYTES
bgez len, 1b bgez len, 1b
ADD dst, dst, 8*NBYTES .set noreorder
ADD len, 8*NBYTES # revert len (see above) ADD len, 8*NBYTES # revert len (see above)
/* /*
...@@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc) ...@@ -471,8 +488,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
ADDC(sum, t2) ADDC(sum, t2)
EXC( STORE t3, UNIT(3)(dst), s_exc) EXC( STORE t3, UNIT(3)(dst), s_exc)
ADDC(sum, t3) ADDC(sum, t3)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
beqz len, done beqz len, done
ADD dst, dst, 4*NBYTES .set noreorder
less_than_4units: less_than_4units:
/* /*
* rem = len % NBYTES * rem = len % NBYTES
...@@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc) ...@@ -485,8 +504,10 @@ EXC( LOAD t0, 0(src), l_exc)
SUB len, len, NBYTES SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc) EXC( STORE t0, 0(dst), s_exc)
ADDC(sum, t0) ADDC(sum, t0)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne rem, len, 1b bne rem, len, 1b
ADD dst, dst, NBYTES .set noreorder
/* /*
* src and dst are aligned, need to copy rem bytes (rem < NBYTES) * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
...@@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc) ...@@ -572,8 +593,10 @@ EXC( STORE t2, UNIT(2)(dst), s_exc)
ADDC(sum, t2) ADDC(sum, t2)
EXC( STORE t3, UNIT(3)(dst), s_exc) EXC( STORE t3, UNIT(3)(dst), s_exc)
ADDC(sum, t3) ADDC(sum, t3)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
bne len, rem, 1b bne len, rem, 1b
ADD dst, dst, 4*NBYTES .set noreorder
cleanup_src_unaligned: cleanup_src_unaligned:
beqz len, done beqz len, done
...@@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) ...@@ -587,8 +610,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
SUB len, len, NBYTES SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc) EXC( STORE t0, 0(dst), s_exc)
ADDC(sum, t0) ADDC(sum, t0)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne len, rem, 1b bne len, rem, 1b
ADD dst, dst, NBYTES .set noreorder
copy_bytes_checklen: copy_bytes_checklen:
beqz len, done beqz len, done
...@@ -631,6 +656,8 @@ copy_bytes_done: ...@@ -631,6 +656,8 @@ copy_bytes_done:
ADDC(sum, t2) ADDC(sum, t2)
done: done:
/* fold checksum */ /* fold checksum */
.set push
.set noat
#ifdef USE_DOUBLE #ifdef USE_DOUBLE
dsll32 v1, sum, 0 dsll32 v1, sum, 0
daddu sum, v1 daddu sum, v1
...@@ -651,6 +678,7 @@ done: ...@@ -651,6 +678,7 @@ done:
srl sum, sum, 8 srl sum, sum, 8
or sum, v1 or sum, v1
andi sum, 0xffff andi sum, 0xffff
.set pop
1: 1:
.set reorder .set reorder
ADDC(sum, psum) ADDC(sum, psum)
...@@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc) ...@@ -678,8 +706,10 @@ EXC( lbu t1, 0(src), l_exc)
SLLV t1, t1, t2 SLLV t1, t1, t2
addu t2, SHIFT_INC addu t2, SHIFT_INC
ADDC(sum, t1) ADDC(sum, t1)
.set reorder /* DADDI_WAR */
ADD dst, dst, 1
bne src, t0, 1b bne src, t0, 1b
ADD dst, dst, 1 .set noreorder
l_exc: l_exc:
LOAD t0, TI_TASK($28) LOAD t0, TI_TASK($28)
nop nop
...@@ -697,12 +727,22 @@ l_exc: ...@@ -697,12 +727,22 @@ l_exc:
* Clear len bytes starting at dst. Can't call __bzero because it * Clear len bytes starting at dst. Can't call __bzero because it
* might modify len. An inefficient loop for these rare times... * might modify len. An inefficient loop for these rare times...
*/ */
.set reorder /* DADDI_WAR */
SUB src, len, 1
beqz len, done beqz len, done
SUB src, len, 1 .set noreorder
1: sb zero, 0(dst) 1: sb zero, 0(dst)
ADD dst, dst, 1 ADD dst, dst, 1
.set push
.set noat
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
bnez src, 1b bnez src, 1b
SUB src, src, 1 SUB src, src, 1
#else
li v1, 1
bnez src, 1b
SUB src, src, v1
#endif
li v1, -EFAULT li v1, -EFAULT
b done b done
sw v1, (errptr) sw v1, (errptr)
...@@ -712,4 +752,5 @@ s_exc: ...@@ -712,4 +752,5 @@ s_exc:
li v1, -EFAULT li v1, -EFAULT
jr ra jr ra
sw v1, (errptr) sw v1, (errptr)
.set pop
END(__csum_partial_copy_user) END(__csum_partial_copy_user)
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
* Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
* Copyright (C) 2002 Broadcom, Inc. * Copyright (C) 2002 Broadcom, Inc.
* memcpy/copy_user author: Mark Vandevoorde * memcpy/copy_user author: Mark Vandevoorde
* Copyright (C) 2007 Maciej W. Rozycki
* *
* Mnemonic names for arguments to memcpy/__copy_user * Mnemonic names for arguments to memcpy/__copy_user
*/ */
...@@ -175,7 +176,11 @@ ...@@ -175,7 +176,11 @@
.text .text
.set noreorder .set noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
.set noat .set noat
#else
.set at=v1
#endif
/* /*
* A combined memcpy/__copy_user * A combined memcpy/__copy_user
...@@ -268,8 +273,10 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy) ...@@ -268,8 +273,10 @@ EXC( LOAD t3, UNIT(3)(src), l_exc_copy)
STORE t1, UNIT(1)(dst) STORE t1, UNIT(1)(dst)
STORE t2, UNIT(2)(dst) STORE t2, UNIT(2)(dst)
STORE t3, UNIT(3)(dst) STORE t3, UNIT(3)(dst)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
beqz len, done beqz len, done
ADD dst, dst, 4*NBYTES .set noreorder
less_than_4units: less_than_4units:
/* /*
* rem = len % NBYTES * rem = len % NBYTES
...@@ -281,8 +288,10 @@ EXC( LOAD t0, 0(src), l_exc) ...@@ -281,8 +288,10 @@ EXC( LOAD t0, 0(src), l_exc)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
STORE t0, 0(dst) STORE t0, 0(dst)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne rem, len, 1b bne rem, len, 1b
ADD dst, dst, NBYTES .set noreorder
/* /*
* src and dst are aligned, need to copy rem bytes (rem < NBYTES) * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
...@@ -361,8 +370,10 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy) ...@@ -361,8 +370,10 @@ EXC( LDREST t3, REST(3)(src), l_exc_copy)
STORE t2, UNIT(2)(dst) STORE t2, UNIT(2)(dst)
STORE t3, UNIT(3)(dst) STORE t3, UNIT(3)(dst)
PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
bne len, rem, 1b bne len, rem, 1b
ADD dst, dst, 4*NBYTES .set noreorder
cleanup_src_unaligned: cleanup_src_unaligned:
beqz len, done beqz len, done
...@@ -375,8 +386,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) ...@@ -375,8 +386,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
STORE t0, 0(dst) STORE t0, 0(dst)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne len, rem, 1b bne len, rem, 1b
ADD dst, dst, NBYTES .set noreorder
copy_bytes_checklen: copy_bytes_checklen:
beqz len, done beqz len, done
...@@ -424,8 +437,10 @@ l_exc_copy: ...@@ -424,8 +437,10 @@ l_exc_copy:
EXC( lb t1, 0(src), l_exc) EXC( lb t1, 0(src), l_exc)
ADD src, src, 1 ADD src, src, 1
sb t1, 0(dst) # can't fault -- we're copy_from_user sb t1, 0(dst) # can't fault -- we're copy_from_user
.set reorder /* DADDI_WAR */
ADD dst, dst, 1
bne src, t0, 1b bne src, t0, 1b
ADD dst, dst, 1 .set noreorder
l_exc: l_exc:
LOAD t0, TI_TASK($28) LOAD t0, TI_TASK($28)
nop nop
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
* Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc. * Copyright (C) 1999, 2000, 01, 2002 Silicon Graphics, Inc.
* Copyright (C) 2002 Broadcom, Inc. * Copyright (C) 2002 Broadcom, Inc.
* memcpy/copy_user author: Mark Vandevoorde * memcpy/copy_user author: Mark Vandevoorde
* Copyright (C) 2007 Maciej W. Rozycki
* *
* Mnemonic names for arguments to memcpy/__copy_user * Mnemonic names for arguments to memcpy/__copy_user
*/ */
...@@ -175,7 +176,11 @@ ...@@ -175,7 +176,11 @@
.text .text
.set noreorder .set noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
.set noat .set noat
#else
.set at=v1
#endif
/* /*
* A combined memcpy/__copy_user * A combined memcpy/__copy_user
...@@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u) ...@@ -271,8 +276,10 @@ EXC( STORE t0, UNIT(0)(dst), s_exc_p4u)
EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
beqz len, done beqz len, done
ADD dst, dst, 4*NBYTES .set noreorder
less_than_4units: less_than_4units:
/* /*
* rem = len % NBYTES * rem = len % NBYTES
...@@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc) ...@@ -284,8 +291,10 @@ EXC( LOAD t0, 0(src), l_exc)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc_p1u) EXC( STORE t0, 0(dst), s_exc_p1u)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne rem, len, 1b bne rem, len, 1b
ADD dst, dst, NBYTES .set noreorder
/* /*
* src and dst are aligned, need to copy rem bytes (rem < NBYTES) * src and dst are aligned, need to copy rem bytes (rem < NBYTES)
...@@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u) ...@@ -364,8 +373,10 @@ EXC( STORE t1, UNIT(1)(dst), s_exc_p3u)
EXC( STORE t2, UNIT(2)(dst), s_exc_p2u) EXC( STORE t2, UNIT(2)(dst), s_exc_p2u)
EXC( STORE t3, UNIT(3)(dst), s_exc_p1u) EXC( STORE t3, UNIT(3)(dst), s_exc_p1u)
PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed) PREF( 1, 9*32(dst) ) # 1 is PREF_STORE (not streamed)
.set reorder /* DADDI_WAR */
ADD dst, dst, 4*NBYTES
bne len, rem, 1b bne len, rem, 1b
ADD dst, dst, 4*NBYTES .set noreorder
cleanup_src_unaligned: cleanup_src_unaligned:
beqz len, done beqz len, done
...@@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy) ...@@ -378,8 +389,10 @@ EXC( LDREST t0, REST(0)(src), l_exc_copy)
ADD src, src, NBYTES ADD src, src, NBYTES
SUB len, len, NBYTES SUB len, len, NBYTES
EXC( STORE t0, 0(dst), s_exc_p1u) EXC( STORE t0, 0(dst), s_exc_p1u)
.set reorder /* DADDI_WAR */
ADD dst, dst, NBYTES
bne len, rem, 1b bne len, rem, 1b
ADD dst, dst, NBYTES .set noreorder
copy_bytes_checklen: copy_bytes_checklen:
beqz len, done beqz len, done
...@@ -427,8 +440,10 @@ l_exc_copy: ...@@ -427,8 +440,10 @@ l_exc_copy:
EXC( lb t1, 0(src), l_exc) EXC( lb t1, 0(src), l_exc)
ADD src, src, 1 ADD src, src, 1
sb t1, 0(dst) # can't fault -- we're copy_from_user sb t1, 0(dst) # can't fault -- we're copy_from_user
.set reorder /* DADDI_WAR */
ADD dst, dst, 1
bne src, t0, 1b bne src, t0, 1b
ADD dst, dst, 1 .set noreorder
l_exc: l_exc:
LOAD t0, TI_TASK($28) LOAD t0, TI_TASK($28)
nop nop
...@@ -446,20 +461,33 @@ l_exc: ...@@ -446,20 +461,33 @@ l_exc:
* Clear len bytes starting at dst. Can't call __bzero because it * Clear len bytes starting at dst. Can't call __bzero because it
* might modify len. An inefficient loop for these rare times... * might modify len. An inefficient loop for these rare times...
*/ */
.set reorder /* DADDI_WAR */
SUB src, len, 1
beqz len, done beqz len, done
SUB src, len, 1 .set noreorder
1: sb zero, 0(dst) 1: sb zero, 0(dst)
ADD dst, dst, 1 ADD dst, dst, 1
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
bnez src, 1b bnez src, 1b
SUB src, src, 1 SUB src, src, 1
#else
.set push
.set noat
li v1, 1
bnez src, 1b
SUB src, src, v1
.set pop
#endif
jr ra jr ra
nop nop
#define SEXC(n) \ #define SEXC(n) \
s_exc_p ## n ## u: \ .set reorder; /* DADDI_WAR */ \
jr ra; \ s_exc_p ## n ## u: \
ADD len, len, n*NBYTES ADD len, len, n*NBYTES; \
jr ra; \
.set noreorder
SEXC(8) SEXC(8)
SEXC(7) SEXC(7)
...@@ -471,8 +499,10 @@ SEXC(2) ...@@ -471,8 +499,10 @@ SEXC(2)
SEXC(1) SEXC(1)
s_exc_p1: s_exc_p1:
.set reorder /* DADDI_WAR */
ADD len, len, 1
jr ra jr ra
ADD len, len, 1 .set noreorder
s_exc: s_exc:
jr ra jr ra
nop nop
...@@ -502,8 +532,10 @@ r_end_bytes: ...@@ -502,8 +532,10 @@ r_end_bytes:
SUB a2, a2, 0x1 SUB a2, a2, 0x1
sb t0, -1(a0) sb t0, -1(a0)
SUB a1, a1, 0x1 SUB a1, a1, 0x1
.set reorder /* DADDI_WAR */
SUB a0, a0, 0x1
bnez a2, r_end_bytes bnez a2, r_end_bytes
SUB a0, a0, 0x1 .set noreorder
r_out: r_out:
jr ra jr ra
...@@ -514,8 +546,10 @@ r_end_bytes_up: ...@@ -514,8 +546,10 @@ r_end_bytes_up:
SUB a2, a2, 0x1 SUB a2, a2, 0x1
sb t0, (a0) sb t0, (a0)
ADD a1, a1, 0x1 ADD a1, a1, 0x1
.set reorder /* DADDI_WAR */
ADD a0, a0, 0x1
bnez a2, r_end_bytes_up bnez a2, r_end_bytes_up
ADD a0, a0, 0x1 .set noreorder
jr ra jr ra
move a2, zero move a2, zero
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
* *
* Copyright (C) 1998, 1999, 2000 by Ralf Baechle * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc. * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2007 Maciej W. Rozycki
*/ */
#include <asm/asm.h> #include <asm/asm.h>
#include <asm/asm-offsets.h> #include <asm/asm-offsets.h>
...@@ -74,8 +75,16 @@ FEXPORT(__bzero) ...@@ -74,8 +75,16 @@ FEXPORT(__bzero)
bnez t0, small_memset bnez t0, small_memset
andi t0, a0, LONGMASK /* aligned? */ andi t0, a0, LONGMASK /* aligned? */
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f beqz t0, 1f
PTR_SUBU t0, LONGSIZE /* alignment in bytes */ PTR_SUBU t0, LONGSIZE /* alignment in bytes */
#else
.set noat
li AT, LONGSIZE
beqz t0, 1f
PTR_SUBU t0, AT /* alignment in bytes */
.set at
#endif
#ifdef __MIPSEB__ #ifdef __MIPSEB__
EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */
...@@ -106,7 +115,7 @@ memset_partial: ...@@ -106,7 +115,7 @@ memset_partial:
.set noat .set noat
LONG_SRL AT, t0, 1 LONG_SRL AT, t0, 1
PTR_SUBU t1, AT PTR_SUBU t1, AT
.set noat .set at
#endif #endif
jr t1 jr t1
PTR_ADDU a0, t0 /* dest ptr */ PTR_ADDU a0, t0 /* dest ptr */
......
...@@ -41,9 +41,9 @@ FEXPORT(__strncpy_from_user_nocheck_asm) ...@@ -41,9 +41,9 @@ FEXPORT(__strncpy_from_user_nocheck_asm)
beqz t0, 2f beqz t0, 2f
sb t0, (a0) sb t0, (a0)
PTR_ADDIU v0, 1 PTR_ADDIU v0, 1
bne v0, a2, 1b
PTR_ADDIU a0, 1
.set reorder .set reorder
PTR_ADDIU a0, 1
bne v0, a2, 1b
2: PTR_ADDU t0, a1, v0 2: PTR_ADDU t0, a1, v0
xor t0, a1 xor t0, a1
bltz t0, fault bltz t0, fault
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
* for more details. * for more details.
* *
* Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 2003, 04, 05 Ralf Baechle (ralf@linux-mips.org)
* Copyright (C) 2007 Maciej W. Rozycki
*/ */
#include <linux/init.h> #include <linux/init.h>
#include <linux/kernel.h> #include <linux/kernel.h>
...@@ -12,6 +13,7 @@ ...@@ -12,6 +13,7 @@
#include <linux/module.h> #include <linux/module.h>
#include <linux/proc_fs.h> #include <linux/proc_fs.h>
#include <asm/bugs.h>
#include <asm/cacheops.h> #include <asm/cacheops.h>
#include <asm/inst.h> #include <asm/inst.h>
#include <asm/io.h> #include <asm/io.h>
...@@ -255,64 +257,58 @@ static inline void build_store_reg(int reg) ...@@ -255,64 +257,58 @@ static inline void build_store_reg(int reg)
__build_store_reg(reg); __build_store_reg(reg);
} }
static inline void build_addiu_a2_a0(unsigned long offset) static inline void build_addiu_rt_rs(unsigned int rt, unsigned int rs,
unsigned long offset)
{ {
union mips_instruction mi; union mips_instruction mi;
BUG_ON(offset > 0x7fff); BUG_ON(offset > 0x7fff);
mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op; if (cpu_has_64bit_gp_regs && DADDI_WAR && r4k_daddiu_bug()) {
mi.i_format.rs = 4; /* $a0 */ mi.i_format.opcode = addiu_op;
mi.i_format.rt = 6; /* $a2 */ mi.i_format.rs = 0; /* $zero */
mi.i_format.simmediate = offset; mi.i_format.rt = 25; /* $t9 */
mi.i_format.simmediate = offset;
emit_instruction(mi);
mi.r_format.opcode = spec_op;
mi.r_format.rs = rs;
mi.r_format.rt = 25; /* $t9 */
mi.r_format.rd = rt;
mi.r_format.re = 0;
mi.r_format.func = daddu_op;
} else {
mi.i_format.opcode = cpu_has_64bit_gp_regs ?
daddiu_op : addiu_op;
mi.i_format.rs = rs;
mi.i_format.rt = rt;
mi.i_format.simmediate = offset;
}
emit_instruction(mi); emit_instruction(mi);
} }
static inline void build_addiu_a2(unsigned long offset) static inline void build_addiu_a2_a0(unsigned long offset)
{ {
union mips_instruction mi; build_addiu_rt_rs(6, 4, offset); /* $a2, $a0, offset */
}
BUG_ON(offset > 0x7fff);
mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
mi.i_format.rs = 6; /* $a2 */
mi.i_format.rt = 6; /* $a2 */
mi.i_format.simmediate = offset;
emit_instruction(mi); static inline void build_addiu_a2(unsigned long offset)
{
build_addiu_rt_rs(6, 6, offset); /* $a2, $a2, offset */
} }
static inline void build_addiu_a1(unsigned long offset) static inline void build_addiu_a1(unsigned long offset)
{ {
union mips_instruction mi; build_addiu_rt_rs(5, 5, offset); /* $a1, $a1, offset */
BUG_ON(offset > 0x7fff);
mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
mi.i_format.rs = 5; /* $a1 */
mi.i_format.rt = 5; /* $a1 */
mi.i_format.simmediate = offset;
load_offset -= offset; load_offset -= offset;
emit_instruction(mi);
} }
static inline void build_addiu_a0(unsigned long offset) static inline void build_addiu_a0(unsigned long offset)
{ {
union mips_instruction mi; build_addiu_rt_rs(4, 4, offset); /* $a0, $a0, offset */
BUG_ON(offset > 0x7fff);
mi.i_format.opcode = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
mi.i_format.rs = 4; /* $a0 */
mi.i_format.rt = 4; /* $a0 */
mi.i_format.simmediate = offset;
store_offset -= offset; store_offset -= offset;
emit_instruction(mi);
} }
static inline void build_bne(unsigned int *dest) static inline void build_bne(unsigned int *dest)
......
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
* Synthesize TLB refill handlers at runtime. * Synthesize TLB refill handlers at runtime.
* *
* Copyright (C) 2004,2005,2006 by Thiemo Seufer * Copyright (C) 2004,2005,2006 by Thiemo Seufer
* Copyright (C) 2005 Maciej W. Rozycki * Copyright (C) 2005, 2007 Maciej W. Rozycki
* Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org) * Copyright (C) 2006 Ralf Baechle (ralf@linux-mips.org)
* *
* ... and the days got worse and worse and now you see * ... and the days got worse and worse and now you see
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <linux/string.h> #include <linux/string.h>
#include <linux/init.h> #include <linux/init.h>
#include <asm/bugs.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/cacheflush.h> #include <asm/cacheflush.h>
#include <asm/mmu_context.h> #include <asm/mmu_context.h>
...@@ -293,7 +294,7 @@ static void __init build_insn(u32 **buf, enum opcode opc, ...) ...@@ -293,7 +294,7 @@ static void __init build_insn(u32 **buf, enum opcode opc, ...)
break; break;
} }
if (!ip) if (!ip || (opc == insn_daddiu && r4k_daddiu_bug()))
panic("Unsupported TLB synthesizer instruction %d", opc); panic("Unsupported TLB synthesizer instruction %d", opc);
op = ip->match; op = ip->match;
...@@ -525,23 +526,33 @@ L_LA(_r3000_write_probe_fail) ...@@ -525,23 +526,33 @@ L_LA(_r3000_write_probe_fail)
#define i_ssnop(buf) i_sll(buf, 0, 0, 1) #define i_ssnop(buf) i_sll(buf, 0, 0, 1)
#define i_ehb(buf) i_sll(buf, 0, 0, 3) #define i_ehb(buf) i_sll(buf, 0, 0, 3)
#ifdef CONFIG_64BIT
static __init int __maybe_unused in_compat_space_p(long addr) static __init int __maybe_unused in_compat_space_p(long addr)
{ {
/* Is this address in 32bit compat space? */ /* Is this address in 32bit compat space? */
#ifdef CONFIG_64BIT
return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L); return (((addr) & 0xffffffff00000000L) == 0xffffffff00000000L);
#else
return 1;
#endif
} }
static __init int __maybe_unused rel_highest(long val) static __init int __maybe_unused rel_highest(long val)
{ {
#ifdef CONFIG_64BIT
return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000; return ((((val + 0x800080008000L) >> 48) & 0xffff) ^ 0x8000) - 0x8000;
#else
return 0;
#endif
} }
static __init int __maybe_unused rel_higher(long val) static __init int __maybe_unused rel_higher(long val)
{ {
#ifdef CONFIG_64BIT
return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000; return ((((val + 0x80008000L) >> 32) & 0xffff) ^ 0x8000) - 0x8000;
} #else
return 0;
#endif #endif
}
static __init int rel_hi(long val) static __init int rel_hi(long val)
{ {
...@@ -555,7 +566,6 @@ static __init int rel_lo(long val) ...@@ -555,7 +566,6 @@ static __init int rel_lo(long val)
static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr) static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr)
{ {
#ifdef CONFIG_64BIT
if (!in_compat_space_p(addr)) { if (!in_compat_space_p(addr)) {
i_lui(buf, rs, rel_highest(addr)); i_lui(buf, rs, rel_highest(addr));
if (rel_higher(addr)) if (rel_higher(addr))
...@@ -567,16 +577,18 @@ static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr) ...@@ -567,16 +577,18 @@ static __init void i_LA_mostly(u32 **buf, unsigned int rs, long addr)
} else } else
i_dsll32(buf, rs, rs, 0); i_dsll32(buf, rs, rs, 0);
} else } else
#endif
i_lui(buf, rs, rel_hi(addr)); i_lui(buf, rs, rel_hi(addr));
} }
static __init void __maybe_unused i_LA(u32 **buf, unsigned int rs, static __init void __maybe_unused i_LA(u32 **buf, unsigned int rs, long addr)
long addr)
{ {
i_LA_mostly(buf, rs, addr); i_LA_mostly(buf, rs, addr);
if (rel_lo(addr)) if (rel_lo(addr)) {
i_ADDIU(buf, rs, rs, rel_lo(addr)); if (!in_compat_space_p(addr))
i_daddiu(buf, rs, rs, rel_lo(addr));
else
i_addiu(buf, rs, rs, rel_lo(addr));
}
} }
/* /*
...@@ -1085,7 +1097,10 @@ build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r, ...@@ -1085,7 +1097,10 @@ build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r,
} else { } else {
i_LA_mostly(p, ptr, modd); i_LA_mostly(p, ptr, modd);
il_b(p, r, label_vmalloc_done); il_b(p, r, label_vmalloc_done);
i_daddiu(p, ptr, ptr, rel_lo(modd)); if (in_compat_space_p(modd))
i_addiu(p, ptr, ptr, rel_lo(modd));
else
i_daddiu(p, ptr, ptr, rel_lo(modd));
} }
l_vmalloc(l, *p); l_vmalloc(l, *p);
...@@ -1106,7 +1121,10 @@ build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r, ...@@ -1106,7 +1121,10 @@ build_get_pgd_vmalloc64(u32 **p, struct label **l, struct reloc **r,
} else { } else {
i_LA_mostly(p, ptr, swpd); i_LA_mostly(p, ptr, swpd);
il_b(p, r, label_vmalloc_done); il_b(p, r, label_vmalloc_done);
i_daddiu(p, ptr, ptr, rel_lo(swpd)); if (in_compat_space_p(swpd))
i_addiu(p, ptr, ptr, rel_lo(swpd));
else
i_daddiu(p, ptr, ptr, rel_lo(swpd));
} }
} }
......
...@@ -28,7 +28,7 @@ static inline void __delay(unsigned long loops) ...@@ -28,7 +28,7 @@ static inline void __delay(unsigned long loops)
" .set reorder \n" " .set reorder \n"
: "=r" (loops) : "=r" (loops)
: "0" (loops)); : "0" (loops));
else if (sizeof(long) == 8) else if (sizeof(long) == 8 && !DADDI_WAR)
__asm__ __volatile__ ( __asm__ __volatile__ (
" .set noreorder \n" " .set noreorder \n"
" .align 3 \n" " .align 3 \n"
...@@ -37,6 +37,15 @@ static inline void __delay(unsigned long loops) ...@@ -37,6 +37,15 @@ static inline void __delay(unsigned long loops)
" .set reorder \n" " .set reorder \n"
: "=r" (loops) : "=r" (loops)
: "0" (loops)); : "0" (loops));
else if (sizeof(long) == 8 && DADDI_WAR)
__asm__ __volatile__ (
" .set noreorder \n"
" .align 3 \n"
"1: bnez %0, 1b \n"
" dsubu %0, %2 \n"
" .set reorder \n"
: "=r" (loops)
: "0" (loops), "r" (1));
} }
......
...@@ -6,6 +6,7 @@ ...@@ -6,6 +6,7 @@
* Copyright (C) 1994, 95, 96, 99, 2001 Ralf Baechle * Copyright (C) 1994, 95, 96, 99, 2001 Ralf Baechle
* Copyright (C) 1994, 1995, 1996 Paul M. Antoine. * Copyright (C) 1994, 1995, 1996 Paul M. Antoine.
* Copyright (C) 1999 Silicon Graphics, Inc. * Copyright (C) 1999 Silicon Graphics, Inc.
* Copyright (C) 2007 Maciej W. Rozycki
*/ */
#ifndef _ASM_STACKFRAME_H #ifndef _ASM_STACKFRAME_H
#define _ASM_STACKFRAME_H #define _ASM_STACKFRAME_H
...@@ -145,8 +146,16 @@ ...@@ -145,8 +146,16 @@
.set reorder .set reorder
/* Called from user mode, new stack. */ /* Called from user mode, new stack. */
get_saved_sp get_saved_sp
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
8: move k0, sp 8: move k0, sp
PTR_SUBU sp, k1, PT_SIZE PTR_SUBU sp, k1, PT_SIZE
#else
.set at=k0
8: PTR_SUBU k1, PT_SIZE
.set noat
move k0, sp
move sp, k1
#endif
LONG_S k0, PT_R29(sp) LONG_S k0, PT_R29(sp)
LONG_S $3, PT_R3(sp) LONG_S $3, PT_R3(sp)
/* /*
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
* *
* Copyright (C) 1996, 1997, 1998, 1999, 2000, 03, 04 by Ralf Baechle * Copyright (C) 1996, 1997, 1998, 1999, 2000, 03, 04 by Ralf Baechle
* Copyright (C) 1999, 2000 Silicon Graphics, Inc. * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
* Copyright (C) 2007 Maciej W. Rozycki
*/ */
#ifndef _ASM_UACCESS_H #ifndef _ASM_UACCESS_H
#define _ASM_UACCESS_H #define _ASM_UACCESS_H
...@@ -387,6 +388,12 @@ extern void __put_user_unknown(void); ...@@ -387,6 +388,12 @@ extern void __put_user_unknown(void);
"jal\t" #destination "\n\t" "jal\t" #destination "\n\t"
#endif #endif
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
#define DADDI_SCRATCH "$0"
#else
#define DADDI_SCRATCH "$3"
#endif
extern size_t __copy_user(void *__to, const void *__from, size_t __n); extern size_t __copy_user(void *__to, const void *__from, size_t __n);
#define __invoke_copy_to_user(to, from, n) \ #define __invoke_copy_to_user(to, from, n) \
...@@ -403,7 +410,7 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n); ...@@ -403,7 +410,7 @@ extern size_t __copy_user(void *__to, const void *__from, size_t __n);
: "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r) \ : "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r) \
: \ : \
: "$8", "$9", "$10", "$11", "$12", "$15", "$24", "$31", \ : "$8", "$9", "$10", "$11", "$12", "$15", "$24", "$31", \
"memory"); \ DADDI_SCRATCH, "memory"); \
__cu_len_r; \ __cu_len_r; \
}) })
...@@ -512,7 +519,7 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n); ...@@ -512,7 +519,7 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
: "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r) \ : "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r) \
: \ : \
: "$8", "$9", "$10", "$11", "$12", "$15", "$24", "$31", \ : "$8", "$9", "$10", "$11", "$12", "$15", "$24", "$31", \
"memory"); \ DADDI_SCRATCH, "memory"); \
__cu_len_r; \ __cu_len_r; \
}) })
...@@ -535,7 +542,7 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n); ...@@ -535,7 +542,7 @@ extern size_t __copy_user_inatomic(void *__to, const void *__from, size_t __n);
: "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r) \ : "+r" (__cu_to_r), "+r" (__cu_from_r), "+r" (__cu_len_r) \
: \ : \
: "$8", "$9", "$10", "$11", "$12", "$15", "$24", "$31", \ : "$8", "$9", "$10", "$11", "$12", "$15", "$24", "$31", \
"memory"); \ DADDI_SCRATCH, "memory"); \
__cu_len_r; \ __cu_len_r; \
}) })
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment