Commit d5d0c35f authored by Jan Lindström's avatar Jan Lindström

Merge pull request #160 from grooverdan/crc32_power_abi_fix

MDEV-9699: power8 crc32: Per the PPC64 ABI, v20-v31 are non-volatile registers
parents 46089d76 5ea894a7
...@@ -67,14 +67,13 @@ ...@@ -67,14 +67,13 @@
#define off96 r30 #define off96 r30
#define off112 r31 #define off112 r31
#define const1 v25 #define const1 v24
#define const2 v26 #define const2 v25
#define byteswap v27 #define byteswap v26
#define mask_32bit v28 #define mask_32bit v27
#define mask_64bit v29 #define mask_64bit v28
#define zeroes v30 #define zeroes v29
#define ones v31
#ifdef BYTESWAP_DATA #ifdef BYTESWAP_DATA
#define VPERM(A, B, C, D) vperm A, B, C, D #define VPERM(A, B, C, D) vperm A, B, C, D
...@@ -101,13 +100,28 @@ FUNC_START(__crc32_vpmsum) ...@@ -101,13 +100,28 @@ FUNC_START(__crc32_vpmsum)
li off112,112 li off112,112
li r0,0 li r0,0
/* Enough room for saving 10 non volatile VMX registers */
subi r6,r1,56+10*16
subi r7,r1,56+2*16
stvx v20,0,r6
stvx v21,off16,r6
stvx v22,off32,r6
stvx v23,off48,r6
stvx v24,off64,r6
stvx v25,off80,r6
stvx v26,off96,r6
stvx v27,off112,r6
stvx v28,0,r7
stvx v29,off16,r7
mr r10,r3 mr r10,r3
vxor zeroes,zeroes,zeroes vxor zeroes,zeroes,zeroes
vspltisw ones,-1 vspltisw v0,-1
vsldoi mask_32bit,zeroes,ones,4 vsldoi mask_32bit,zeroes,v0,4
vsldoi mask_64bit,zeroes,ones,8 vsldoi mask_64bit,zeroes,v0,8
/* Get the initial value into v8 */ /* Get the initial value into v8 */
vxor v8,v8,v8 vxor v8,v8,v8
...@@ -570,6 +584,21 @@ FUNC_START(__crc32_vpmsum) ...@@ -570,6 +584,21 @@ FUNC_START(__crc32_vpmsum)
vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
#endif #endif
.Lout:
subi r6,r1,56+10*16
subi r7,r1,56+2*16
lvx v20,0,r6
lvx v21,off16,r6
lvx v22,off32,r6
lvx v23,off48,r6
lvx v24,off64,r6
lvx v25,off80,r6
lvx v26,off96,r6
lvx v27,off112,r6
lvx v28,0,r7
lvx v29,off16,r7
/* Get it into r3 */ /* Get it into r3 */
MFVRD(r3, v0) MFVRD(r3, v0)
...@@ -739,6 +768,8 @@ FUNC_START(__crc32_vpmsum) ...@@ -739,6 +768,8 @@ FUNC_START(__crc32_vpmsum)
.Lzero: .Lzero:
mr r3,r10 mr r3,r10
blr blr
b .Lout
FUNC_END(__crc32_vpmsum) FUNC_END(__crc32_vpmsum)
#endif /* __powerpc__ */ #endif /* __powerpc__ */
...@@ -67,14 +67,13 @@ ...@@ -67,14 +67,13 @@
#define off96 r30 #define off96 r30
#define off112 r31 #define off112 r31
#define const1 v25 #define const1 v24
#define const2 v26 #define const2 v25
#define byteswap v27 #define byteswap v26
#define mask_32bit v28 #define mask_32bit v27
#define mask_64bit v29 #define mask_64bit v28
#define zeroes v30 #define zeroes v29
#define ones v31
#ifdef BYTESWAP_DATA #ifdef BYTESWAP_DATA
#define VPERM(A, B, C, D) vperm A, B, C, D #define VPERM(A, B, C, D) vperm A, B, C, D
...@@ -101,13 +100,28 @@ FUNC_START(__crc32_vpmsum) ...@@ -101,13 +100,28 @@ FUNC_START(__crc32_vpmsum)
li off112,112 li off112,112
li r0,0 li r0,0
/* Enough room for saving 10 non volatile VMX registers */
subi r6,r1,56+10*16
subi r7,r1,56+2*16
stvx v20,0,r6
stvx v21,off16,r6
stvx v22,off32,r6
stvx v23,off48,r6
stvx v24,off64,r6
stvx v25,off80,r6
stvx v26,off96,r6
stvx v27,off112,r6
stvx v28,0,r7
stvx v29,off16,r7
mr r10,r3 mr r10,r3
vxor zeroes,zeroes,zeroes vxor zeroes,zeroes,zeroes
vspltisw ones,-1 vspltisw v0,-1
vsldoi mask_32bit,zeroes,ones,4 vsldoi mask_32bit,zeroes,v0,4
vsldoi mask_64bit,zeroes,ones,8 vsldoi mask_64bit,zeroes,v0,8
/* Get the initial value into v8 */ /* Get the initial value into v8 */
vxor v8,v8,v8 vxor v8,v8,v8
...@@ -570,6 +584,21 @@ FUNC_START(__crc32_vpmsum) ...@@ -570,6 +584,21 @@ FUNC_START(__crc32_vpmsum)
vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */ vsldoi v0,v0,zeroes,4 /* shift result into top 64 bits of */
#endif #endif
.Lout:
subi r6,r1,56+10*16
subi r7,r1,56+2*16
lvx v20,0,r6
lvx v21,off16,r6
lvx v22,off32,r6
lvx v23,off48,r6
lvx v24,off64,r6
lvx v25,off80,r6
lvx v26,off96,r6
lvx v27,off112,r6
lvx v28,0,r7
lvx v29,off16,r7
/* Get it into r3 */ /* Get it into r3 */
MFVRD(r3, v0) MFVRD(r3, v0)
...@@ -739,6 +768,8 @@ FUNC_START(__crc32_vpmsum) ...@@ -739,6 +768,8 @@ FUNC_START(__crc32_vpmsum)
.Lzero: .Lzero:
mr r3,r10 mr r3,r10
blr blr
b .Lout
FUNC_END(__crc32_vpmsum) FUNC_END(__crc32_vpmsum)
#endif /* __powerpc__ */ #endif /* __powerpc__ */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment