Commit 503e6ccd authored by erifan01's avatar erifan01 Committed by Cherry Zhang

math/big: fix the bug in assembly implementation of shlVU on arm64

For the case where the addresses of parameter z and x of the function
shlVU overlap and the address of z is greater than x, x (input value)
can be polluted during the calculation when the high words of x are
overlapped with the low words of z (output value).

Fixes #31084

Change-Id: I9bb0266a1d7856b8faa9a9b1975d6f57dece0479
Reviewed-on: https://go-review.googlesource.com/c/go/+/169780
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent dc0388c5
...@@ -194,87 +194,97 @@ len0: ...@@ -194,87 +194,97 @@ len0:
MOVD R2, c+56(FP) MOVD R2, c+56(FP)
RET RET
// func shlVU(z, x []Word, s uint) (c Word) // func shlVU(z, x []Word, s uint) (c Word)
// This implementation handles the shift operation from the high word to the low word,
// which may be an error for the case where the low word of x overlaps with the high
// word of z. When calling this function directly, you need to pay attention to this
// situation.
TEXT ·shlVU(SB),NOSPLIT,$0 TEXT ·shlVU(SB),NOSPLIT,$0
// Disable assembly for now - it is subtly incorrect. LDP z+0(FP), (R0, R1) // R0 = z.ptr, R1 = len(z)
// See #31084 for a test that fails using this code.
B ·shlVU_g(SB)
MOVD z+0(FP), R0
MOVD z_len+8(FP), R1
MOVD x+24(FP), R2 MOVD x+24(FP), R2
MOVD s+48(FP), R3 MOVD s+48(FP), R3
MOVD $0, R8 // in order not to affect the first element, R8 is initialized to zero ADD R1<<3, R0 // R0 = &z[n]
MOVD $64, R4 ADD R1<<3, R2 // R2 = &x[n]
SUB R3, R4
CBZ R1, len0 CBZ R1, len0
CBZ R3, copy // if the number of shift is 0, just copy x to z CBZ R3, copy // if the number of shift is 0, just copy x to z
MOVD $64, R4
TBZ $0, R1, two SUB R3, R4
MOVD.P 8(R2), R6 // handling the most significant element x[n-1]
LSR R4, R6, R8 MOVD.W -8(R2), R6
LSL R3, R6 LSR R4, R6, R5 // return value
MOVD.P R6, 8(R0) LSL R3, R6, R8 // x[i] << s
SUB $1, R1 SUB $1, R1
one: TBZ $0, R1, two
MOVD.W -8(R2), R6
LSR R4, R6, R7
ORR R8, R7
LSL R3, R6, R8
SUB $1, R1
MOVD.W R7, -8(R0)
two: two:
TBZ $1, R1, loop TBZ $1, R1, loop
LDP.P 16(R2), (R6, R7) LDP.W -16(R2), (R6, R7)
LSR R4, R6, R9 LSR R4, R7, R10
LSL R3, R6 ORR R8, R10
ORR R8, R6
LSR R4, R7, R8
LSL R3, R7 LSL R3, R7
ORR R9, R7 LSR R4, R6, R9
STP.P (R6, R7), 16(R0) ORR R7, R9
LSL R3, R6, R8
SUB $2, R1 SUB $2, R1
STP.W (R9, R10), -16(R0)
loop: loop:
CBZ R1, done CBZ R1, done
LDP.P 32(R2), (R10, R11) LDP.W -32(R2), (R10, R11)
LDP -16(R2), (R12, R13) LDP 16(R2), (R12, R13)
LSR R4, R10, R20 LSR R4, R13, R23
LSL R3, R10 ORR R8, R23 // z[i] = (x[i] << s) | (x[i-1] >> (64 - s))
ORR R8, R10 // z[i] = (x[i] << s) | (x[i-1] >> (64 - s)) LSL R3, R13
LSR R4, R11, R21
LSL R3, R11
ORR R20, R11
LSR R4, R12, R22 LSR R4, R12, R22
ORR R13, R22
LSL R3, R12 LSL R3, R12
ORR R21, R12 LSR R4, R11, R21
LSR R4, R13, R8 ORR R12, R21
LSL R3, R13 LSL R3, R11
ORR R22, R13 LSR R4, R10, R20
STP.P (R10, R11), 32(R0) ORR R11, R20
STP (R12, R13), -16(R0) LSL R3, R10, R8
STP.W (R20, R21), -32(R0)
STP (R22, R23), 16(R0)
SUB $4, R1 SUB $4, R1
B loop B loop
done: done:
MOVD R8, c+56(FP) // the part moved out from the last element MOVD.W R8, -8(R0) // the first element x[0]
MOVD R5, c+56(FP) // the part moved out from x[n-1]
RET RET
copy: copy:
CMP R0, R2
BEQ len0
TBZ $0, R1, ctwo TBZ $0, R1, ctwo
MOVD.P 8(R2), R3 MOVD.W -8(R2), R4
MOVD.P R3, 8(R0) MOVD.W R4, -8(R0)
SUB $1, R1 SUB $1, R1
ctwo: ctwo:
TBZ $1, R1, cloop TBZ $1, R1, cloop
LDP.P 16(R2), (R4, R5) LDP.W -16(R2), (R4, R5)
STP.P (R4, R5), 16(R0) STP.W (R4, R5), -16(R0)
SUB $2, R1 SUB $2, R1
cloop: cloop:
CBZ R1, len0 CBZ R1, len0
LDP.P 32(R2), (R4, R5) LDP.W -32(R2), (R4, R5)
LDP -16(R2), (R6, R7) LDP 16(R2), (R6, R7)
STP.P (R4, R5), 32(R0) STP.W (R4, R5), -32(R0)
STP (R6, R7), -16(R0) STP (R6, R7), 16(R0)
SUB $4, R1 SUB $4, R1
B cloop B cloop
len0: len0:
MOVD $0, c+56(FP) MOVD $0, c+56(FP)
RET RET
// func shrVU(z, x []Word, s uint) (c Word) // func shrVU(z, x []Word, s uint) (c Word)
// This implementation handles the shift operation from the low word to the high word,
// which may be an error for the case where the high word of x overlaps with the low
// word of z. When calling this function directly, you need to pay attention to this
// situation.
TEXT ·shrVU(SB),NOSPLIT,$0 TEXT ·shrVU(SB),NOSPLIT,$0
MOVD z+0(FP), R0 MOVD z+0(FP), R0
MOVD z_len+8(FP), R1 MOVD z_len+8(FP), R1
...@@ -334,6 +344,8 @@ done: ...@@ -334,6 +344,8 @@ done:
MOVD R8, (R0) // deal with the last element MOVD R8, (R0) // deal with the last element
RET RET
copy: copy:
CMP R0, R2
BEQ len0
TBZ $0, R1, ctwo TBZ $0, R1, ctwo
MOVD.P 8(R2), R3 MOVD.P 8(R2), R3
MOVD.P R3, 8(R0) MOVD.P R3, 8(R0)
......
...@@ -213,6 +213,75 @@ func TestFunVW(t *testing.T) { ...@@ -213,6 +213,75 @@ func TestFunVW(t *testing.T) {
} }
} }
type argVU struct {
d []Word // d is a Word slice, the input parameters x and z come from this array.
l uint // l is the length of the input parameters x and z.
xp uint // xp is the starting position of the input parameter x, x := d[xp:xp+l].
zp uint // zp is the starting position of the input parameter z, z := d[zp:zp+l].
s uint // s is the shift number.
r []Word // r is the expected output result z.
c Word // c is the expected return value.
m string // message.
}
var argshlVU = []argVU{
// test cases for shlVU
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0}, 7, 0, 0, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "complete overlap of shlVU"},
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0}, 7, 0, 3, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by half of shlVU"},
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0}, 7, 0, 6, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "partial overlap by 1 Word of shlVU"},
{[]Word{1, _M, _M, _M, _M, _M, 3 << (_W - 2), 0, 0, 0, 0, 0, 0, 0, 0}, 7, 0, 7, 1, []Word{2, _M - 1, _M, _M, _M, _M, 1<<(_W-1) + 1}, 1, "no overlap of shlVU"},
}
var argshrVU = []argVU{
// test cases for shrVU
{[]Word{0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 1, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "complete overlap of shrVU"},
{[]Word{0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 4, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by half of shrVU"},
{[]Word{0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 7, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "partial overlap by 1 Word of shrVU"},
{[]Word{0, 0, 0, 0, 0, 0, 0, 0, 3, _M, _M, _M, _M, _M, 1 << (_W - 1)}, 7, 8, 1, 1, []Word{1<<(_W-1) + 1, _M, _M, _M, _M, _M >> 1, 1 << (_W - 2)}, 1 << (_W - 1), "no overlap of shrVU"},
}
func testShiftFunc(t *testing.T, f func(z, x []Word, s uint) Word, a argVU) {
// save a.d for error message, or it will be overwritten.
b := make([]Word, len(a.d))
copy(b, a.d)
z := a.d[a.zp : a.zp+a.l]
x := a.d[a.xp : a.xp+a.l]
c := f(z, x, a.s)
for i, zi := range z {
if zi != a.r[i] {
t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot z[%d] = %#x; want %#x", b, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, i, zi, a.r[i])
break
}
}
if c != a.c {
t.Errorf("d := %v, %s(d[%d:%d], d[%d:%d], %d)\n\tgot c = %#x; want %#x", b, a.m, a.zp, a.zp+a.l, a.xp, a.xp+a.l, a.s, c, a.c)
}
}
func TestShiftOverlap(t *testing.T) {
for _, a := range argshlVU {
arg := a
testShiftFunc(t, shlVU, arg)
}
for _, a := range argshrVU {
arg := a
testShiftFunc(t, shrVU, arg)
}
}
func TestIssue31084(t *testing.T) {
// compute 10^n via 5^n << n.
const n = 165
p := nat(nil).expNN(nat{5}, nat{n}, nil)
p = p.shl(p, uint(n))
got := string(p.utoa(10))
want := "1" + strings.Repeat("0", n)
if got != want {
t.Errorf("shl(%v, %v)\n\tgot %s; want %s\n", p, uint(n), got, want)
}
}
func BenchmarkAddVW(b *testing.B) { func BenchmarkAddVW(b *testing.B) {
for _, n := range benchSizes { for _, n := range benchSizes {
if isRaceBuilder && n > 1e3 { if isRaceBuilder && n > 1e3 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment