Commit 1731d90d authored by David S. Miller's avatar David S. Miller Committed by Greg Kroah-Hartman

sparc64: Convert U1copy_{from,to}_user to accurate exception reporting.

[ Upstream commit cb736fdb ]

Report the exact number of bytes which have not been successfully
copied when an exception occurs, using the running remaining length.
Signed-off-by: default avatarDavid S. Miller <davem@davemloft.net>
Signed-off-by: default avatarGreg Kroah-Hartman <gregkh@linuxfoundation.org>
parent 8a444c77
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/ */
#define EX_LD(x) \ #define EX_LD(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_mone; \ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_LD_FP(x) \ #define EX_LD_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_mone_fp;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -3,19 +3,19 @@ ...@@ -3,19 +3,19 @@
* Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
*/ */
#define EX_ST(x) \ #define EX_ST(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_mone; \ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
#define EX_ST_FP(x) \ #define EX_ST_FP(x,y) \
98: x; \ 98: x; \
.section __ex_table,"a";\ .section __ex_table,"a";\
.align 4; \ .align 4; \
.word 98b, __retl_mone_fp;\ .word 98b, y; \
.text; \ .text; \
.align 4; .align 4;
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
*/ */
#ifdef __KERNEL__ #ifdef __KERNEL__
#include <linux/linkage.h>
#include <asm/visasm.h> #include <asm/visasm.h>
#include <asm/asi.h> #include <asm/asi.h>
#define GLOBAL_SPARE g7 #define GLOBAL_SPARE g7
...@@ -23,21 +24,17 @@ ...@@ -23,21 +24,17 @@
#endif #endif
#ifndef EX_LD #ifndef EX_LD
#define EX_LD(x) x #define EX_LD(x,y) x
#endif #endif
#ifndef EX_LD_FP #ifndef EX_LD_FP
#define EX_LD_FP(x) x #define EX_LD_FP(x,y) x
#endif #endif
#ifndef EX_ST #ifndef EX_ST
#define EX_ST(x) x #define EX_ST(x,y) x
#endif #endif
#ifndef EX_ST_FP #ifndef EX_ST_FP
#define EX_ST_FP(x) x #define EX_ST_FP(x,y) x
#endif
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
#endif #endif
#ifndef LOAD #ifndef LOAD
...@@ -78,53 +75,169 @@ ...@@ -78,53 +75,169 @@
faligndata %f7, %f8, %f60; \ faligndata %f7, %f8, %f60; \
faligndata %f8, %f9, %f62; faligndata %f8, %f9, %f62;
#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
EX_LD_FP(LOAD_BLK(%src, %fdest)); \ EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
add %src, 0x40, %src; \ add %src, 0x40, %src; \
subcc %len, 0x40, %len; \ subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
be,pn %xcc, jmptgt; \ be,pn %xcc, jmptgt; \
add %dest, 0x40, %dest; \ add %dest, 0x40, %dest; \
#define LOOP_CHUNK1(src, dest, len, branch_dest) \ #define LOOP_CHUNK1(src, dest, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
#define LOOP_CHUNK2(src, dest, len, branch_dest) \ #define LOOP_CHUNK2(src, dest, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
#define LOOP_CHUNK3(src, dest, len, branch_dest) \ #define LOOP_CHUNK3(src, dest, branch_dest) \
MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
#define DO_SYNC membar #Sync; #define DO_SYNC membar #Sync;
#define STORE_SYNC(dest, fsrc) \ #define STORE_SYNC(dest, fsrc) \
EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
add %dest, 0x40, %dest; \ add %dest, 0x40, %dest; \
DO_SYNC DO_SYNC
#define STORE_JUMP(dest, fsrc, target) \ #define STORE_JUMP(dest, fsrc, target) \
EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
add %dest, 0x40, %dest; \ add %dest, 0x40, %dest; \
ba,pt %xcc, target; \ ba,pt %xcc, target; \
nop; nop;
#define FINISH_VISCHUNK(dest, f0, f1, left) \ #define FINISH_VISCHUNK(dest, f0, f1) \
subcc %left, 8, %left;\ subcc %g3, 8, %g3; \
bl,pn %xcc, 95f; \ bl,pn %xcc, 95f; \
faligndata %f0, %f1, %f48; \ faligndata %f0, %f1, %f48; \
EX_ST_FP(STORE(std, %f48, %dest)); \ EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
add %dest, 8, %dest; add %dest, 8, %dest;
#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ #define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
subcc %left, 8, %left; \ subcc %g3, 8, %g3; \
bl,pn %xcc, 95f; \ bl,pn %xcc, 95f; \
fsrc2 %f0, %f1; fsrc2 %f0, %f1;
#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ #define UNEVEN_VISCHUNK(dest, f0, f1) \
UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
ba,a,pt %xcc, 93f; ba,a,pt %xcc, 93f;
.register %g2,#scratch .register %g2,#scratch
.register %g3,#scratch .register %g3,#scratch
.text .text
#ifndef EX_RETVAL
#define EX_RETVAL(x) x
ENTRY(U1_g1_1_fp)
VISExitHalf
add %g1, 1, %g1
add %g1, %g2, %g1
retl
add %g1, %o2, %o0
ENDPROC(U1_g1_1_fp)
ENTRY(U1_g2_0_fp)
VISExitHalf
retl
add %g2, %o2, %o0
ENDPROC(U1_g2_0_fp)
ENTRY(U1_g2_8_fp)
VISExitHalf
add %g2, 8, %g2
retl
add %g2, %o2, %o0
ENDPROC(U1_g2_8_fp)
ENTRY(U1_gs_0_fp)
VISExitHalf
add %GLOBAL_SPARE, %g3, %o0
retl
add %o0, %o2, %o0
ENDPROC(U1_gs_0_fp)
ENTRY(U1_gs_80_fp)
VISExitHalf
add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
add %GLOBAL_SPARE, %g3, %o0
retl
add %o0, %o2, %o0
ENDPROC(U1_gs_80_fp)
ENTRY(U1_gs_40_fp)
VISExitHalf
add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
add %GLOBAL_SPARE, %g3, %o0
retl
add %o0, %o2, %o0
ENDPROC(U1_gs_40_fp)
ENTRY(U1_g3_0_fp)
VISExitHalf
retl
add %g3, %o2, %o0
ENDPROC(U1_g3_0_fp)
ENTRY(U1_g3_8_fp)
VISExitHalf
add %g3, 8, %g3
retl
add %g3, %o2, %o0
ENDPROC(U1_g3_8_fp)
ENTRY(U1_o2_0_fp)
VISExitHalf
retl
mov %o2, %o0
ENDPROC(U1_o2_0_fp)
ENTRY(U1_o2_1_fp)
VISExitHalf
retl
add %o2, 1, %o0
ENDPROC(U1_o2_1_fp)
ENTRY(U1_gs_0)
VISExitHalf
retl
add %GLOBAL_SPARE, %o2, %o0
ENDPROC(U1_gs_0)
ENTRY(U1_gs_8)
VISExitHalf
add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
retl
add %GLOBAL_SPARE, 0x8, %o0
ENDPROC(U1_gs_8)
ENTRY(U1_gs_10)
VISExitHalf
add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
retl
add %GLOBAL_SPARE, 0x10, %o0
ENDPROC(U1_gs_10)
ENTRY(U1_o2_0)
retl
mov %o2, %o0
ENDPROC(U1_o2_0)
ENTRY(U1_o2_8)
retl
add %o2, 8, %o0
ENDPROC(U1_o2_8)
ENTRY(U1_o2_4)
retl
add %o2, 4, %o0
ENDPROC(U1_o2_4)
ENTRY(U1_o2_1)
retl
add %o2, 1, %o0
ENDPROC(U1_o2_1)
ENTRY(U1_g1_0)
retl
add %g1, %o2, %o0
ENDPROC(U1_g1_0)
ENTRY(U1_g1_1)
add %g1, 1, %g1
retl
add %g1, %o2, %o0
ENDPROC(U1_g1_1)
ENTRY(U1_gs_0_o2_adj)
and %o2, 7, %o2
retl
add %GLOBAL_SPARE, %o2, %o0
ENDPROC(U1_gs_0_o2_adj)
ENTRY(U1_gs_8_o2_adj)
and %o2, 7, %o2
add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
retl
add %GLOBAL_SPARE, %o2, %o0
ENDPROC(U1_gs_8_o2_adj)
#endif
.align 64 .align 64
.globl FUNC_NAME .globl FUNC_NAME
...@@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -166,8 +279,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
and %g2, 0x38, %g2 and %g2, 0x38, %g2
1: subcc %g1, 0x1, %g1 1: subcc %g1, 0x1, %g1
EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 0x1, %o1 add %o1, 0x1, %o1
...@@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -178,20 +291,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
be,pt %icc, 3f be,pt %icc, 3f
alignaddr %o1, %g0, %o1 alignaddr %o1, %g0, %o1
EX_LD_FP(LOAD(ldd, %o1, %f4)) EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) 1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f4, %f6, %f0 faligndata %f4, %f6, %f0
EX_ST_FP(STORE(std, %f0, %o0)) EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
be,pn %icc, 3f be,pn %icc, 3f
add %o0, 0x8, %o0 add %o0, 0x8, %o0
EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
subcc %g2, 0x8, %g2 subcc %g2, 0x8, %g2
faligndata %f6, %f4, %f0 faligndata %f6, %f4, %f0
EX_ST_FP(STORE(std, %f0, %o0)) EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
bne,pt %icc, 1b bne,pt %icc, 1b
add %o0, 0x8, %o0 add %o0, 0x8, %o0
...@@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -214,13 +327,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
add %g1, %GLOBAL_SPARE, %g1 add %g1, %GLOBAL_SPARE, %g1
subcc %o2, %g3, %o2 subcc %o2, %g3, %o2
EX_LD_FP(LOAD_BLK(%o1, %f0)) EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
add %o1, 0x40, %o1 add %o1, 0x40, %o1
add %g1, %g3, %g1 add %g1, %g3, %g1
EX_LD_FP(LOAD_BLK(%o1, %f16)) EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
add %o1, 0x40, %o1 add %o1, 0x40, %o1
sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
EX_LD_FP(LOAD_BLK(%o1, %f32)) EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
add %o1, 0x40, %o1 add %o1, 0x40, %o1
/* There are 8 instances of the unrolled loop, /* There are 8 instances of the unrolled loop,
...@@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -240,11 +353,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
.align 64 .align 64
1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f0, %f2, %f48 faligndata %f0, %f2, %f48
1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
...@@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -261,11 +374,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 56f) STORE_JUMP(o0, f48, 56f)
1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f2, %f4, %f48 faligndata %f2, %f4, %f48
1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
...@@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -282,11 +395,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 57f) STORE_JUMP(o0, f48, 57f)
1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f4, %f6, %f48 faligndata %f4, %f6, %f48
1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
...@@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -303,11 +416,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 58f) STORE_JUMP(o0, f48, 58f)
1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f6, %f8, %f48 faligndata %f6, %f8, %f48
1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
...@@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -324,11 +437,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 59f) STORE_JUMP(o0, f48, 59f)
1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f8, %f10, %f48 faligndata %f8, %f10, %f48
1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
...@@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -345,11 +458,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 60f) STORE_JUMP(o0, f48, 60f)
1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f10, %f12, %f48 faligndata %f10, %f12, %f48
1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
...@@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -366,11 +479,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 61f) STORE_JUMP(o0, f48, 61f)
1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f12, %f14, %f48 faligndata %f12, %f14, %f48
1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
...@@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -387,11 +500,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
STORE_JUMP(o0, f48, 62f) STORE_JUMP(o0, f48, 62f)
1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) LOOP_CHUNK1(o1, o0, 1f)
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) LOOP_CHUNK2(o1, o0, 2f)
FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) LOOP_CHUNK3(o1, o0, 3f)
ba,pt %xcc, 1b+4 ba,pt %xcc, 1b+4
faligndata %f14, %f16, %f48 faligndata %f14, %f16, %f48
1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
...@@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -407,53 +520,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
STORE_JUMP(o0, f48, 63f) STORE_JUMP(o0, f48, 63f)
40: FINISH_VISCHUNK(o0, f0, f2, g3) 40: FINISH_VISCHUNK(o0, f0, f2)
41: FINISH_VISCHUNK(o0, f2, f4, g3) 41: FINISH_VISCHUNK(o0, f2, f4)
42: FINISH_VISCHUNK(o0, f4, f6, g3) 42: FINISH_VISCHUNK(o0, f4, f6)
43: FINISH_VISCHUNK(o0, f6, f8, g3) 43: FINISH_VISCHUNK(o0, f6, f8)
44: FINISH_VISCHUNK(o0, f8, f10, g3) 44: FINISH_VISCHUNK(o0, f8, f10)
45: FINISH_VISCHUNK(o0, f10, f12, g3) 45: FINISH_VISCHUNK(o0, f10, f12)
46: FINISH_VISCHUNK(o0, f12, f14, g3) 46: FINISH_VISCHUNK(o0, f12, f14)
47: UNEVEN_VISCHUNK(o0, f14, f0, g3) 47: UNEVEN_VISCHUNK(o0, f14, f0)
48: FINISH_VISCHUNK(o0, f16, f18, g3) 48: FINISH_VISCHUNK(o0, f16, f18)
49: FINISH_VISCHUNK(o0, f18, f20, g3) 49: FINISH_VISCHUNK(o0, f18, f20)
50: FINISH_VISCHUNK(o0, f20, f22, g3) 50: FINISH_VISCHUNK(o0, f20, f22)
51: FINISH_VISCHUNK(o0, f22, f24, g3) 51: FINISH_VISCHUNK(o0, f22, f24)
52: FINISH_VISCHUNK(o0, f24, f26, g3) 52: FINISH_VISCHUNK(o0, f24, f26)
53: FINISH_VISCHUNK(o0, f26, f28, g3) 53: FINISH_VISCHUNK(o0, f26, f28)
54: FINISH_VISCHUNK(o0, f28, f30, g3) 54: FINISH_VISCHUNK(o0, f28, f30)
55: UNEVEN_VISCHUNK(o0, f30, f0, g3) 55: UNEVEN_VISCHUNK(o0, f30, f0)
56: FINISH_VISCHUNK(o0, f32, f34, g3) 56: FINISH_VISCHUNK(o0, f32, f34)
57: FINISH_VISCHUNK(o0, f34, f36, g3) 57: FINISH_VISCHUNK(o0, f34, f36)
58: FINISH_VISCHUNK(o0, f36, f38, g3) 58: FINISH_VISCHUNK(o0, f36, f38)
59: FINISH_VISCHUNK(o0, f38, f40, g3) 59: FINISH_VISCHUNK(o0, f38, f40)
60: FINISH_VISCHUNK(o0, f40, f42, g3) 60: FINISH_VISCHUNK(o0, f40, f42)
61: FINISH_VISCHUNK(o0, f42, f44, g3) 61: FINISH_VISCHUNK(o0, f42, f44)
62: FINISH_VISCHUNK(o0, f44, f46, g3) 62: FINISH_VISCHUNK(o0, f44, f46)
63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) 63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
93: EX_LD_FP(LOAD(ldd, %o1, %f2)) 93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
add %o1, 8, %o1 add %o1, 8, %o1
subcc %g3, 8, %g3 subcc %g3, 8, %g3
faligndata %f0, %f2, %f8 faligndata %f0, %f2, %f8
EX_ST_FP(STORE(std, %f8, %o0)) EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
bl,pn %xcc, 95f bl,pn %xcc, 95f
add %o0, 8, %o0 add %o0, 8, %o0
EX_LD_FP(LOAD(ldd, %o1, %f0)) EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
add %o1, 8, %o1 add %o1, 8, %o1
subcc %g3, 8, %g3 subcc %g3, 8, %g3
faligndata %f2, %f0, %f8 faligndata %f2, %f0, %f8
EX_ST_FP(STORE(std, %f8, %o0)) EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
bge,pt %xcc, 93b bge,pt %xcc, 93b
add %o0, 8, %o0 add %o0, 8, %o0
95: brz,pt %o2, 2f 95: brz,pt %o2, 2f
mov %g1, %o1 mov %g1, %o1
1: EX_LD_FP(LOAD(ldub, %o1, %o3)) 1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
add %o1, 1, %o1 add %o1, 1, %o1
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EX_ST_FP(STORE(stb, %o3, %o0)) EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
bne,pt %xcc, 1b bne,pt %xcc, 1b
add %o0, 1, %o0 add %o0, 1, %o0
...@@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -469,27 +582,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
72: andn %o2, 0xf, %GLOBAL_SPARE 72: andn %o2, 0xf, %GLOBAL_SPARE
and %o2, 0xf, %o2 and %o2, 0xf, %o2
1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
EX_ST(STORE(stx, %o5, %o1 + %o3)) EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
EX_ST(STORE(stx, %g1, %o1 + %o3)) EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 0x8, %o1 add %o1, 0x8, %o1
73: andcc %o2, 0x8, %g0 73: andcc %o2, 0x8, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
EX_LD(LOAD(ldx, %o1, %o5)) EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
sub %o2, 0x8, %o2 sub %o2, 0x8, %o2
EX_ST(STORE(stx, %o5, %o1 + %o3)) EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
add %o1, 0x8, %o1 add %o1, 0x8, %o1
1: andcc %o2, 0x4, %g0 1: andcc %o2, 0x4, %g0
be,pt %XCC, 1f be,pt %XCC, 1f
nop nop
EX_LD(LOAD(lduw, %o1, %o5)) EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
sub %o2, 0x4, %o2 sub %o2, 0x4, %o2
EX_ST(STORE(stw, %o5, %o1 + %o3)) EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
add %o1, 0x4, %o1 add %o1, 0x4, %o1
1: cmp %o2, 0 1: cmp %o2, 0
be,pt %XCC, 85f be,pt %XCC, 85f
...@@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -503,9 +616,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
sub %g0, %g1, %g1 sub %g0, %g1, %g1
sub %o2, %g1, %o2 sub %o2, %g1, %o2
1: EX_LD(LOAD(ldub, %o1, %o5)) 1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
subcc %g1, 1, %g1 subcc %g1, 1, %g1
EX_ST(STORE(stb, %o5, %o1 + %o3)) EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
bgu,pt %icc, 1b bgu,pt %icc, 1b
add %o1, 1, %o1 add %o1, 1, %o1
...@@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -521,16 +634,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
8: mov 64, %o3 8: mov 64, %o3
andn %o1, 0x7, %o1 andn %o1, 0x7, %o1
EX_LD(LOAD(ldx, %o1, %g2)) EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
sub %o3, %g1, %o3 sub %o3, %g1, %o3
andn %o2, 0x7, %GLOBAL_SPARE andn %o2, 0x7, %GLOBAL_SPARE
sllx %g2, %g1, %g2 sllx %g2, %g1, %g2
1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
add %o1, 0x8, %o1 add %o1, 0x8, %o1
srlx %g3, %o3, %o5 srlx %g3, %o3, %o5
or %o5, %g2, %o5 or %o5, %g2, %o5
EX_ST(STORE(stx, %o5, %o0)) EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
add %o0, 0x8, %o0 add %o0, 0x8, %o0
bgu,pt %icc, 1b bgu,pt %icc, 1b
sllx %g3, %g1, %g2 sllx %g3, %g1, %g2
...@@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -548,9 +661,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
bne,pn %XCC, 90f bne,pn %XCC, 90f
sub %o0, %o1, %o3 sub %o0, %o1, %o3
1: EX_LD(LOAD(lduw, %o1, %g1)) 1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
subcc %o2, 4, %o2 subcc %o2, 4, %o2
EX_ST(STORE(stw, %g1, %o1 + %o3)) EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
bgu,pt %XCC, 1b bgu,pt %XCC, 1b
add %o1, 4, %o1 add %o1, 4, %o1
...@@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ...@@ -558,9 +671,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
mov EX_RETVAL(%o4), %o0 mov EX_RETVAL(%o4), %o0
.align 32 .align 32
90: EX_LD(LOAD(ldub, %o1, %g1)) 90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
subcc %o2, 1, %o2 subcc %o2, 1, %o2
EX_ST(STORE(stb, %g1, %o1 + %o3)) EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
bgu,pt %XCC, 90b bgu,pt %XCC, 90b
add %o1, 1, %o1 add %o1, 1, %o1
retl retl
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment