Commit f78b09e6 authored by Robert Griesemer's avatar Robert Griesemer

big: assembly routines for 386 long shifts

R=rsc
CC=golang-dev
https://golang.org/cl/974043
parent 2b0a30c4
...@@ -11,8 +11,8 @@ TEXT ·addVV(SB),7,$0 ...@@ -11,8 +11,8 @@ TEXT ·addVV(SB),7,$0
MOVL x+4(FP), SI MOVL x+4(FP), SI
MOVL y+8(FP), CX MOVL y+8(FP), CX
MOVL n+12(FP), BP MOVL n+12(FP), BP
MOVL $0, BX // i = 0 MOVL $0, BX // i = 0
MOVL $0, DX // c = 0 MOVL $0, DX // c = 0
JMP E1 JMP E1
L1: MOVL (SI)(BX*4), AX L1: MOVL (SI)(BX*4), AX
...@@ -20,7 +20,7 @@ L1: MOVL (SI)(BX*4), AX ...@@ -20,7 +20,7 @@ L1: MOVL (SI)(BX*4), AX
ADCL (CX)(BX*4), AX ADCL (CX)(BX*4), AX
RCLL $1, DX RCLL $1, DX
MOVL AX, (DI)(BX*4) MOVL AX, (DI)(BX*4)
ADDL $1, BX // i++ ADDL $1, BX // i++
E1: CMPL BX, BP // i < n E1: CMPL BX, BP // i < n
JL L1 JL L1
...@@ -36,8 +36,8 @@ TEXT ·subVV(SB),7,$0 ...@@ -36,8 +36,8 @@ TEXT ·subVV(SB),7,$0
MOVL x+4(FP), SI MOVL x+4(FP), SI
MOVL y+8(FP), CX MOVL y+8(FP), CX
MOVL n+12(FP), BP MOVL n+12(FP), BP
MOVL $0, BX // i = 0 MOVL $0, BX // i = 0
MOVL $0, DX // c = 0 MOVL $0, DX // c = 0
JMP E2 JMP E2
L2: MOVL (SI)(BX*4), AX L2: MOVL (SI)(BX*4), AX
...@@ -45,9 +45,9 @@ L2: MOVL (SI)(BX*4), AX ...@@ -45,9 +45,9 @@ L2: MOVL (SI)(BX*4), AX
SBBL (CX)(BX*4), AX SBBL (CX)(BX*4), AX
RCLL $1, DX RCLL $1, DX
MOVL AX, (DI)(BX*4) MOVL AX, (DI)(BX*4)
ADDL $1, BX // i++ ADDL $1, BX // i++
E2: CMPL BX, BP // i < n E2: CMPL BX, BP // i < n
JL L2 JL L2
MOVL DX, c+16(FP) MOVL DX, c+16(FP)
...@@ -58,18 +58,18 @@ E2: CMPL BX, BP // i < n ...@@ -58,18 +58,18 @@ E2: CMPL BX, BP // i < n
TEXT ·addVW(SB),7,$0 TEXT ·addVW(SB),7,$0
MOVL z+0(FP), DI MOVL z+0(FP), DI
MOVL x+4(FP), SI MOVL x+4(FP), SI
MOVL y+8(FP), AX // c = y MOVL y+8(FP), AX // c = y
MOVL n+12(FP), BP MOVL n+12(FP), BP
MOVL $0, BX // i = 0 MOVL $0, BX // i = 0
JMP E3 JMP E3
L3: ADDL (SI)(BX*4), AX L3: ADDL (SI)(BX*4), AX
MOVL AX, (DI)(BX*4) MOVL AX, (DI)(BX*4)
RCLL $1, AX RCLL $1, AX
ANDL $1, AX ANDL $1, AX
ADDL $1, BX // i++ ADDL $1, BX // i++
E3: CMPL BX, BP // i < n E3: CMPL BX, BP // i < n
JL L3 JL L3
MOVL AX, c+16(FP) MOVL AX, c+16(FP)
...@@ -80,9 +80,9 @@ E3: CMPL BX, BP // i < n ...@@ -80,9 +80,9 @@ E3: CMPL BX, BP // i < n
TEXT ·subVW(SB),7,$0 TEXT ·subVW(SB),7,$0
MOVL z+0(FP), DI MOVL z+0(FP), DI
MOVL x+4(FP), SI MOVL x+4(FP), SI
MOVL y+8(FP), AX // c = y MOVL y+8(FP), AX // c = y
MOVL n+12(FP), BP MOVL n+12(FP), BP
MOVL $0, BX // i = 0 MOVL $0, BX // i = 0
JMP E4 JMP E4
L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL? L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL?
...@@ -90,9 +90,9 @@ L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL? ...@@ -90,9 +90,9 @@ L4: MOVL (SI)(BX*4), DX // TODO(gri) is there a reverse SUBL?
MOVL DX, (DI)(BX*4) MOVL DX, (DI)(BX*4)
RCLL $1, AX RCLL $1, AX
ANDL $1, AX ANDL $1, AX
ADDL $1, BX // i++ ADDL $1, BX // i++
E4: CMPL BX, BP // i < n E4: CMPL BX, BP // i < n
JL L4 JL L4
MOVL AX, c+16(FP) MOVL AX, c+16(FP)
...@@ -100,17 +100,52 @@ E4: CMPL BX, BP // i < n ...@@ -100,17 +100,52 @@ E4: CMPL BX, BP // i < n
// func shlVW(z, x *Word, s Word, n int) (c Word) // func shlVW(z, x *Word, s Word, n int) (c Word)
// TODO(gri) implement this routine
TEXT ·shlVW(SB),7,$0 TEXT ·shlVW(SB),7,$0
NOP // work around bug in linker MOVL z+0(FP), DI
JMP ·shlVW_g(SB) MOVL x+4(FP), SI
MOVL s+8(FP), CX
MOVL n+12(FP), BP
MOVL $0, AX // c = 0
MOVL $0, BX // i = 0
JMP E8
L8: MOVL (SI)(BX*8), DX
SHLL CX, DX:AX
MOVL DX, (DI)(BX*8)
MOVL (SI)(BX*8), AX // reload (not enough regs to save original DX)
ADDL $1, BX // i++
E8: CMPL BX, BP // i < n
JL L8
MOVL $0, DX
SHLL CX, DX:AX
MOVL DX, c+16(FP)
RET
// func shrVW(z, x *Word, s Word, n int) (c Word) // func shrVW(z, x *Word, s Word, n int) (c Word)
// TODO(gri) implement this routine
TEXT ·shrVW(SB),7,$0 TEXT ·shrVW(SB),7,$0
NOP // work around bug in linker MOVL z+0(FP), DI
JMP ·shrVW_g(SB) MOVL x+4(FP), SI
MOVL s+8(FP), CX
MOVL n+12(FP), BX // i = n
MOVL $0, AX // c = 0
JMP E9
L9: MOVL (SI)(BX*8), DX
MOVL DX, BP
SHRL CX, DX:AX
MOVL DX, (DI)(BX*8)
MOVL BP, AX
E9: SUBL $1, BX // i--
JGE L9
MOVL $0, DX
SHRL CX, DX:AX
MOVL DX, c+16(FP)
RET
// func mulAddVWW(z, x *Word, y, r Word, n int) (c Word) // func mulAddVWW(z, x *Word, y, r Word, n int) (c Word)
...@@ -118,7 +153,7 @@ TEXT ·mulAddVWW(SB),7,$0 ...@@ -118,7 +153,7 @@ TEXT ·mulAddVWW(SB),7,$0
MOVL z+0(FP), DI MOVL z+0(FP), DI
MOVL x+4(FP), SI MOVL x+4(FP), SI
MOVL y+8(FP), BP MOVL y+8(FP), BP
MOVL r+12(FP), CX // c = r MOVL r+12(FP), CX // c = r
MOVL n+16(FP), BX MOVL n+16(FP), BX
LEAL (SI)(BX*4), SI LEAL (SI)(BX*4), SI
LEAL (DI)(BX*4), DI LEAL (DI)(BX*4), DI
...@@ -131,9 +166,9 @@ L5: MOVL (SI)(BX*4), AX ...@@ -131,9 +166,9 @@ L5: MOVL (SI)(BX*4), AX
ADCL $0, DX ADCL $0, DX
MOVL AX, (DI)(BX*4) MOVL AX, (DI)(BX*4)
MOVL DX, CX MOVL DX, CX
ADDL $1, BX // i++ ADDL $1, BX // i++
E5: CMPL BX, $0 // i < 0 E5: CMPL BX, $0 // i < 0
JL L5 JL L5
MOVL CX, c+20(FP) MOVL CX, c+20(FP)
...@@ -148,8 +183,8 @@ TEXT ·addMulVVW(SB),7,$0 ...@@ -148,8 +183,8 @@ TEXT ·addMulVVW(SB),7,$0
MOVL n+12(FP), BX MOVL n+12(FP), BX
LEAL (SI)(BX*4), SI LEAL (SI)(BX*4), SI
LEAL (DI)(BX*4), DI LEAL (DI)(BX*4), DI
NEGL BX // i = -n NEGL BX // i = -n
MOVL $0, CX // c = 0 MOVL $0, CX // c = 0
JMP E6 JMP E6
L6: MOVL (SI)(BX*4), AX L6: MOVL (SI)(BX*4), AX
...@@ -160,9 +195,9 @@ L6: MOVL (SI)(BX*4), AX ...@@ -160,9 +195,9 @@ L6: MOVL (SI)(BX*4), AX
ADCL $0, DX ADCL $0, DX
MOVL AX, (DI)(BX*4) MOVL AX, (DI)(BX*4)
MOVL DX, CX MOVL DX, CX
ADDL $1, BX // i++ ADDL $1, BX // i++
E6: CMPL BX, $0 // i < 0 E6: CMPL BX, $0 // i < 0
JL L6 JL L6
MOVL CX, c+16(FP) MOVL CX, c+16(FP)
...@@ -172,18 +207,18 @@ E6: CMPL BX, $0 // i < 0 ...@@ -172,18 +207,18 @@ E6: CMPL BX, $0 // i < 0
// divWVW(z* Word, xn Word, x *Word, y Word, n int) (r Word) // divWVW(z* Word, xn Word, x *Word, y Word, n int) (r Word)
TEXT ·divWVW(SB),7,$0 TEXT ·divWVW(SB),7,$0
MOVL z+0(FP), DI MOVL z+0(FP), DI
MOVL xn+4(FP), DX // r = xn MOVL xn+4(FP), DX // r = xn
MOVL x+8(FP), SI MOVL x+8(FP), SI
MOVL y+12(FP), CX MOVL y+12(FP), CX
MOVL n+16(FP), BX // i = n MOVL n+16(FP), BX // i = n
JMP E7 JMP E7
L7: MOVL (SI)(BX*4), AX L7: MOVL (SI)(BX*4), AX
DIVL CX DIVL CX
MOVL AX, (DI)(BX*4) MOVL AX, (DI)(BX*4)
E7: SUBL $1, BX // i-- E7: SUBL $1, BX // i--
JGE L7 // i >= 0 JGE L7 // i >= 0
MOVL DX, r+20(FP) MOVL DX, r+20(FP)
RET RET
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment