Commit 5714c91b authored by erifan01's avatar erifan01 Committed by Ben Shi

cmd/compile: intrinsify math/bits.Add64 for arm64

This CL instrinsifies Add64 with arm64 instruction sequence ADDS, ADCS
and ADC, and optimzes the case of carry chains.The CL also changes the
test code so that the intrinsic implementation can be tested.

Benchmarks:
name               old time/op       new time/op       delta
Add-224            2.500000ns +- 0%  2.090000ns +- 4%  -16.40%  (p=0.000 n=9+10)
Add32-224          2.500000ns +- 0%  2.500000ns +- 0%     ~     (all equal)
Add64-224          2.500000ns +- 0%  1.577778ns +- 2%  -36.89%  (p=0.000 n=10+9)
Add64multiple-224  6.000000ns +- 0%  2.000000ns +- 0%  -66.67%  (p=0.000 n=10+10)

Change-Id: I6ee91c9a85c16cc72ade5fd94868c579f16c7615
Reviewed-on: https://go-review.googlesource.com/c/go/+/159017
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 456f3e10
...@@ -246,6 +246,30 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -246,6 +246,30 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.Reg = v.Args[0].Reg() p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpARM64ADDSconstflags:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
p.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg0()
case ssa.OpARM64ADCzerocarry:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
p.Reg = arm64.REGZERO
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64ADCSflags:
r := v.Reg0()
r1 := v.Args[0].Reg()
r2 := v.Args[1].Reg()
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = r2
p.Reg = r1
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpARM64EXTRconst, case ssa.OpARM64EXTRconst,
ssa.OpARM64EXTRWconst: ssa.OpARM64EXTRWconst:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
......
...@@ -3562,8 +3562,8 @@ func init() { ...@@ -3562,8 +3562,8 @@ func init() {
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2]) return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
}, },
sys.AMD64) sys.AMD64, sys.ARM64)
alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64) alias("math/bits", "Add", "math/bits", "Add64", sys.ArchAMD64, sys.ArchARM64)
addF("math/bits", "Sub64", addF("math/bits", "Sub64",
func(s *state, n *Node, args []*ssa.Value) *ssa.Value { func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2]) return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1], args[2])
......
...@@ -144,6 +144,12 @@ ...@@ -144,6 +144,12 @@
(UMOD <typ.UInt64> x y) -> (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y)) (UMOD <typ.UInt64> x y) -> (MSUB <typ.UInt64> x y (UDIV <typ.UInt64> x y))
(UMODW <typ.UInt32> x y) -> (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y)) (UMODW <typ.UInt32> x y) -> (MSUBW <typ.UInt32> x y (UDIVW <typ.UInt32> x y))
// 64-bit addition with carry.
(Select0 (Add64carry x y c)) -> (Select0 <typ.UInt64> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c))))
(Select1 (Add64carry x y c)) -> (ADCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c)))))
// The carry flag of c doesn't change.
(ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (ADCzerocarry <typ.UInt64> c)))) -> (ADCSflags x y c)
// boolean ops -- booleans are represented with 0=false, 1=true // boolean ops -- booleans are represented with 0=false, 1=true
(AndB x y) -> (AND x y) (AndB x y) -> (AND x y)
(OrB x y) -> (OR x y) (OrB x y) -> (OR x y)
......
...@@ -138,18 +138,21 @@ func init() { ...@@ -138,18 +138,21 @@ func init() {
// Common regInfo // Common regInfo
var ( var (
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp0flags1 = regInfo{inputs: []regMask{0}, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}} gp11sp = regInfo{inputs: []regMask{gpspg}, outputs: []regMask{gp}}
gp1flags = regInfo{inputs: []regMask{gpg}} gp1flags = regInfo{inputs: []regMask{gpg}}
gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}} gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
gp11flags = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp, 0}}
gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
gp31 = regInfo{inputs: []regMask{gpg, gpg, gpg}, outputs: []regMask{gp}}
gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp2flags = regInfo{inputs: []regMask{gpg, gpg}} gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp2flags1flags = regInfo{inputs: []regMask{gp, gp, 0}, outputs: []regMask{gp, 0}}
gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
gp31 = regInfo{inputs: []regMask{gpg, gpg, gpg}, outputs: []regMask{gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
gpstore0 = regInfo{inputs: []regMask{gpspsbg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
...@@ -171,6 +174,9 @@ func init() { ...@@ -171,6 +174,9 @@ func init() {
) )
ops := []opData{ ops := []opData{
// binary ops // binary ops
{name: "ADCSflags", argLength: 3, reg: gp2flags1flags, typ: "(UInt64,Flags)", asm: "ADCS", commutative: true}, // arg0+arg1+carry, set flags.
{name: "ADCzerocarry", argLength: 1, reg: gp0flags1, typ: "UInt64", asm: "ADC"}, // ZR+ZR+carry
{name: "ADDSconstflags", argLength: 1, reg: gp11flags, typ: "(UInt64,Flags)", asm: "ADDS", aux: "Int64"}, // arg0+auxint, set flags.
{name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1 {name: "ADD", argLength: 2, reg: gp21, asm: "ADD", commutative: true}, // arg0 + arg1
{name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int64"}, // arg0 + auxInt {name: "ADDconst", argLength: 1, reg: gp11sp, asm: "ADD", aux: "Int64"}, // arg0 + auxInt
{name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1 {name: "SUB", argLength: 2, reg: gp21, asm: "SUB"}, // arg0 - arg1
...@@ -214,6 +220,7 @@ func init() { ...@@ -214,6 +220,7 @@ func init() {
{name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0 | ^arg1 {name: "ORN", argLength: 2, reg: gp21, asm: "ORN"}, // arg0 | ^arg1
{name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo) {name: "LoweredMuluhilo", argLength: 2, reg: gp22, resultNotInArgs: true}, // arg0 * arg1, returns (hi, lo)
// unary ops // unary ops
{name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0 {name: "MVN", argLength: 1, reg: gp11, asm: "MVN"}, // ^arg0
{name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0 {name: "NEG", argLength: 1, reg: gp11, asm: "NEG"}, // -arg0
......
...@@ -1141,6 +1141,9 @@ const ( ...@@ -1141,6 +1141,9 @@ const (
OpARMInvertFlags OpARMInvertFlags
OpARMLoweredWB OpARMLoweredWB
OpARM64ADCSflags
OpARM64ADCzerocarry
OpARM64ADDSconstflags
OpARM64ADD OpARM64ADD
OpARM64ADDconst OpARM64ADDconst
OpARM64SUB OpARM64SUB
...@@ -15137,6 +15140,47 @@ var opcodeTable = [...]opInfo{ ...@@ -15137,6 +15140,47 @@ var opcodeTable = [...]opInfo{
}, },
}, },
{
name: "ADCSflags",
argLen: 3,
commutative: true,
asm: arm64.AADCS,
reg: regInfo{
inputs: []inputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
{1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
outputs: []outputInfo{
{1, 0},
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "ADCzerocarry",
argLen: 1,
asm: arm64.AADC,
reg: regInfo{
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "ADDSconstflags",
auxType: auxInt64,
argLen: 1,
asm: arm64.AADDS,
reg: regInfo{
inputs: []inputInfo{
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
},
outputs: []outputInfo{
{1, 0},
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{ {
name: "ADD", name: "ADD",
argLen: 2, argLen: 2,
......
...@@ -17,6 +17,8 @@ var _ = types.TypeMem // in case not otherwise used ...@@ -17,6 +17,8 @@ var _ = types.TypeMem // in case not otherwise used
func rewriteValueARM64(v *Value) bool { func rewriteValueARM64(v *Value) bool {
switch v.Op { switch v.Op {
case OpARM64ADCSflags:
return rewriteValueARM64_OpARM64ADCSflags_0(v)
case OpARM64ADD: case OpARM64ADD:
return rewriteValueARM64_OpARM64ADD_0(v) || rewriteValueARM64_OpARM64ADD_10(v) || rewriteValueARM64_OpARM64ADD_20(v) return rewriteValueARM64_OpARM64ADD_0(v) || rewriteValueARM64_OpARM64ADD_10(v) || rewriteValueARM64_OpARM64ADD_20(v)
case OpARM64ADDconst: case OpARM64ADDconst:
...@@ -873,6 +875,10 @@ func rewriteValueARM64(v *Value) bool { ...@@ -873,6 +875,10 @@ func rewriteValueARM64(v *Value) bool {
return rewriteValueARM64_OpRsh8x64_0(v) return rewriteValueARM64_OpRsh8x64_0(v)
case OpRsh8x8: case OpRsh8x8:
return rewriteValueARM64_OpRsh8x8_0(v) return rewriteValueARM64_OpRsh8x8_0(v)
case OpSelect0:
return rewriteValueARM64_OpSelect0_0(v)
case OpSelect1:
return rewriteValueARM64_OpSelect1_0(v)
case OpSignExt16to32: case OpSignExt16to32:
return rewriteValueARM64_OpSignExt16to32_0(v) return rewriteValueARM64_OpSignExt16to32_0(v)
case OpSignExt16to64: case OpSignExt16to64:
...@@ -948,6 +954,46 @@ func rewriteValueARM64(v *Value) bool { ...@@ -948,6 +954,46 @@ func rewriteValueARM64(v *Value) bool {
} }
return false return false
} }
func rewriteValueARM64_OpARM64ADCSflags_0(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] (ADCzerocarry <typ.UInt64> c))))
// cond:
// result: (ADCSflags x y c)
for {
_ = v.Args[2]
x := v.Args[0]
y := v.Args[1]
v_2 := v.Args[2]
if v_2.Op != OpSelect1 {
break
}
if v_2.Type != types.TypeFlags {
break
}
v_2_0 := v_2.Args[0]
if v_2_0.Op != OpARM64ADDSconstflags {
break
}
if v_2_0.AuxInt != -1 {
break
}
v_2_0_0 := v_2_0.Args[0]
if v_2_0_0.Op != OpARM64ADCzerocarry {
break
}
if v_2_0_0.Type != typ.UInt64 {
break
}
c := v_2_0_0.Args[0]
v.reset(OpARM64ADCSflags)
v.AddArg(x)
v.AddArg(y)
v.AddArg(c)
return true
}
return false
}
func rewriteValueARM64_OpARM64ADD_0(v *Value) bool { func rewriteValueARM64_OpARM64ADD_0(v *Value) bool {
// match: (ADD x (MOVDconst [c])) // match: (ADD x (MOVDconst [c]))
// cond: // cond:
...@@ -36794,6 +36840,68 @@ func rewriteValueARM64_OpRsh8x8_0(v *Value) bool { ...@@ -36794,6 +36840,68 @@ func rewriteValueARM64_OpRsh8x8_0(v *Value) bool {
return true return true
} }
} }
func rewriteValueARM64_OpSelect0_0(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (Select0 (Add64carry x y c))
// cond:
// result: (Select0 <typ.UInt64> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c))))
for {
v_0 := v.Args[0]
if v_0.Op != OpAdd64carry {
break
}
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpSelect0)
v.Type = typ.UInt64
v0 := b.NewValue0(v.Pos, OpARM64ADCSflags, types.NewTuple(typ.UInt64, types.TypeFlags))
v0.AddArg(x)
v0.AddArg(y)
v1 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v2 := b.NewValue0(v.Pos, OpARM64ADDSconstflags, types.NewTuple(typ.UInt64, types.TypeFlags))
v2.AuxInt = -1
v2.AddArg(c)
v1.AddArg(v2)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueARM64_OpSelect1_0(v *Value) bool {
b := v.Block
typ := &b.Func.Config.Types
// match: (Select1 (Add64carry x y c))
// cond:
// result: (ADCzerocarry <typ.UInt64> (Select1 <types.TypeFlags> (ADCSflags x y (Select1 <types.TypeFlags> (ADDSconstflags [-1] c)))))
for {
v_0 := v.Args[0]
if v_0.Op != OpAdd64carry {
break
}
c := v_0.Args[2]
x := v_0.Args[0]
y := v_0.Args[1]
v.reset(OpARM64ADCzerocarry)
v.Type = typ.UInt64
v0 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v1 := b.NewValue0(v.Pos, OpARM64ADCSflags, types.NewTuple(typ.UInt64, types.TypeFlags))
v1.AddArg(x)
v1.AddArg(y)
v2 := b.NewValue0(v.Pos, OpSelect1, types.TypeFlags)
v3 := b.NewValue0(v.Pos, OpARM64ADDSconstflags, types.NewTuple(typ.UInt64, types.TypeFlags))
v3.AuxInt = -1
v3.AddArg(c)
v2.AddArg(v3)
v1.AddArg(v2)
v0.AddArg(v1)
v.AddArg(v0)
return true
}
return false
}
func rewriteValueARM64_OpSignExt16to32_0(v *Value) bool { func rewriteValueARM64_OpSignExt16to32_0(v *Value) bool {
// match: (SignExt16to32 x) // match: (SignExt16to32 x)
// cond: // cond:
......
...@@ -736,6 +736,13 @@ func TestAddSubUint(t *testing.T) { ...@@ -736,6 +736,13 @@ func TestAddSubUint(t *testing.T) {
test("Add symmetric", Add, a.y, a.x, a.c, a.z, a.cout) test("Add symmetric", Add, a.y, a.x, a.c, a.z, a.cout)
test("Sub", Sub, a.z, a.x, a.c, a.y, a.cout) test("Sub", Sub, a.z, a.x, a.c, a.y, a.cout)
test("Sub symmetric", Sub, a.z, a.y, a.c, a.x, a.cout) test("Sub symmetric", Sub, a.z, a.y, a.c, a.x, a.cout)
// The above code can't test intrinsic implementation, because the passed function is not called directly.
// The following code uses a closure to test the intrinsic version in case the function is intrinsified.
test("Add intrinsic", func(x, y, c uint) (uint, uint) { return Add(x, y, c) }, a.x, a.y, a.c, a.z, a.cout)
test("Add intrinsic symmetric", func(x, y, c uint) (uint, uint) { return Add(x, y, c) }, a.y, a.x, a.c, a.z, a.cout)
test("Sub intrinsic", func(x, y, c uint) (uint, uint) { return Sub(x, y, c) }, a.z, a.x, a.c, a.y, a.cout)
test("Add intrinsic symmetric", func(x, y, c uint) (uint, uint) { return Sub(x, y, c) }, a.z, a.y, a.c, a.x, a.cout)
} }
} }
...@@ -790,6 +797,12 @@ func TestAddSubUint64(t *testing.T) { ...@@ -790,6 +797,12 @@ func TestAddSubUint64(t *testing.T) {
test("Add64 symmetric", Add64, a.y, a.x, a.c, a.z, a.cout) test("Add64 symmetric", Add64, a.y, a.x, a.c, a.z, a.cout)
test("Sub64", Sub64, a.z, a.x, a.c, a.y, a.cout) test("Sub64", Sub64, a.z, a.x, a.c, a.y, a.cout)
test("Sub64 symmetric", Sub64, a.z, a.y, a.c, a.x, a.cout) test("Sub64 symmetric", Sub64, a.z, a.y, a.c, a.x, a.cout)
// The above code can't test intrinsic implementation, because the passed function is not called directly.
// The following code uses a closure to test the intrinsic version in case the function is intrinsified.
test("Add64 intrinsic", func(x, y, c uint64) (uint64, uint64) { return Add64(x, y, c) }, a.x, a.y, a.c, a.z, a.cout)
test("Add64 intrinsic symmetric", func(x, y, c uint64) (uint64, uint64) { return Add64(x, y, c) }, a.y, a.x, a.c, a.z, a.cout)
test("Sub64 intrinsic", func(x, y, c uint64) (uint64, uint64) { return Sub64(x, y, c) }, a.z, a.x, a.c, a.y, a.cout)
test("Add64 intrinsic symmetric", func(x, y, c uint64) (uint64, uint64) { return Sub64(x, y, c) }, a.z, a.y, a.c, a.x, a.cout)
} }
} }
...@@ -817,6 +830,12 @@ func TestMulDiv(t *testing.T) { ...@@ -817,6 +830,12 @@ func TestMulDiv(t *testing.T) {
testMul("Mul symmetric", Mul, a.y, a.x, a.hi, a.lo) testMul("Mul symmetric", Mul, a.y, a.x, a.hi, a.lo)
testDiv("Div", Div, a.hi, a.lo+a.r, a.y, a.x, a.r) testDiv("Div", Div, a.hi, a.lo+a.r, a.y, a.x, a.r)
testDiv("Div symmetric", Div, a.hi, a.lo+a.r, a.x, a.y, a.r) testDiv("Div symmetric", Div, a.hi, a.lo+a.r, a.x, a.y, a.r)
// The above code can't test intrinsic implementation, because the passed function is not called directly.
// The following code uses a closure to test the intrinsic version in case the function is intrinsified.
testMul("Mul intrinsic", func(x, y uint) (uint, uint) { return Mul(x, y) }, a.x, a.y, a.hi, a.lo)
testMul("Mul intrinsic symmetric", func(x, y uint) (uint, uint) { return Mul(x, y) }, a.y, a.x, a.hi, a.lo)
testDiv("Div intrinsic", func(hi, lo, y uint) (uint, uint) { return Div(hi, lo, y) }, a.hi, a.lo+a.r, a.y, a.x, a.r)
testDiv("Div intrinsic symmetric", func(hi, lo, y uint) (uint, uint) { return Div(hi, lo, y) }, a.hi, a.lo+a.r, a.x, a.y, a.r)
} }
} }
...@@ -873,6 +892,12 @@ func TestMulDiv64(t *testing.T) { ...@@ -873,6 +892,12 @@ func TestMulDiv64(t *testing.T) {
testMul("Mul64 symmetric", Mul64, a.y, a.x, a.hi, a.lo) testMul("Mul64 symmetric", Mul64, a.y, a.x, a.hi, a.lo)
testDiv("Div64", Div64, a.hi, a.lo+a.r, a.y, a.x, a.r) testDiv("Div64", Div64, a.hi, a.lo+a.r, a.y, a.x, a.r)
testDiv("Div64 symmetric", Div64, a.hi, a.lo+a.r, a.x, a.y, a.r) testDiv("Div64 symmetric", Div64, a.hi, a.lo+a.r, a.x, a.y, a.r)
// The above code can't test intrinsic implementation, because the passed function is not called directly.
// The following code uses a closure to test the intrinsic version in case the function is intrinsified.
testMul("Mul64 intrinsic", func(x, y uint64) (uint64, uint64) { return Mul64(x, y) }, a.x, a.y, a.hi, a.lo)
testMul("Mul64 intrinsic symmetric", func(x, y uint64) (uint64, uint64) { return Mul64(x, y) }, a.y, a.x, a.hi, a.lo)
testDiv("Div64 intrinsic", func(hi, lo, y uint64) (uint64, uint64) { return Div64(hi, lo, y) }, a.hi, a.lo+a.r, a.y, a.x, a.r)
testDiv("Div64 intrinsic symmetric", func(hi, lo, y uint64) (uint64, uint64) { return Div64(hi, lo, y) }, a.hi, a.lo+a.r, a.x, a.y, a.r)
} }
} }
......
...@@ -367,21 +367,25 @@ func IterateBits8(n uint8) int { ...@@ -367,21 +367,25 @@ func IterateBits8(n uint8) int {
// --------------- // // --------------- //
func Add(x, y, ci uint) (r, co uint) { func Add(x, y, ci uint) (r, co uint) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ" // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
return bits.Add(x, y, ci) return bits.Add(x, y, ci)
} }
func AddC(x, ci uint) (r, co uint) { func AddC(x, ci uint) (r, co uint) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ" // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
return bits.Add(x, 7, ci) return bits.Add(x, 7, ci)
} }
func AddZ(x, y uint) (r, co uint) { func AddZ(x, y uint) (r, co uint) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
return bits.Add(x, y, 0) return bits.Add(x, y, 0)
} }
func AddR(x, y, ci uint) uint { func AddR(x, y, ci uint) uint {
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
r, _ := bits.Add(x, y, ci) r, _ := bits.Add(x, y, ci)
return r return r
...@@ -389,27 +393,32 @@ func AddR(x, y, ci uint) uint { ...@@ -389,27 +393,32 @@ func AddR(x, y, ci uint) uint {
func AddM(p, q, r *[3]uint) { func AddM(p, q, r *[3]uint) {
var c uint var c uint
r[0], c = bits.Add(p[0], q[0], c) r[0], c = bits.Add(p[0], q[0], c)
// arm64:"ADCS",-"ADD\t",-"CMP"
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
r[1], c = bits.Add(p[1], q[1], c) r[1], c = bits.Add(p[1], q[1], c)
r[2], c = bits.Add(p[2], q[2], c) r[2], c = bits.Add(p[2], q[2], c)
} }
func Add64(x, y, ci uint64) (r, co uint64) { func Add64(x, y, ci uint64) (r, co uint64) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ" // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
return bits.Add64(x, y, ci) return bits.Add64(x, y, ci)
} }
func Add64C(x, ci uint64) (r, co uint64) { func Add64C(x, ci uint64) (r, co uint64) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ","SBBQ","NEGQ" // amd64:"NEGL","ADCQ","SBBQ","NEGQ"
return bits.Add64(x, 7, ci) return bits.Add64(x, 7, ci)
} }
func Add64Z(x, y uint64) (r, co uint64) { func Add64Z(x, y uint64) (r, co uint64) {
// arm64:"ADDS","ADCS","ADC",-"ADD\t",-"CMP"
// amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ" // amd64:"ADDQ","SBBQ","NEGQ",-"NEGL",-"ADCQ"
return bits.Add64(x, y, 0) return bits.Add64(x, y, 0)
} }
func Add64R(x, y, ci uint64) uint64 { func Add64R(x, y, ci uint64) uint64 {
// arm64:"ADDS","ADCS",-"ADD\t",-"CMP"
// amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ" // amd64:"NEGL","ADCQ",-"SBBQ",-"NEGQ"
r, _ := bits.Add64(x, y, ci) r, _ := bits.Add64(x, y, ci)
return r return r
...@@ -417,6 +426,7 @@ func Add64R(x, y, ci uint64) uint64 { ...@@ -417,6 +426,7 @@ func Add64R(x, y, ci uint64) uint64 {
func Add64M(p, q, r *[3]uint64) { func Add64M(p, q, r *[3]uint64) {
var c uint64 var c uint64
r[0], c = bits.Add64(p[0], q[0], c) r[0], c = bits.Add64(p[0], q[0], c)
// arm64:"ADCS",-"ADD\t",-"CMP"
// amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ" // amd64:"ADCQ",-"NEGL",-"SBBQ",-"NEGQ"
r[1], c = bits.Add64(p[1], q[1], c) r[1], c = bits.Add64(p[1], q[1], c)
r[2], c = bits.Add64(p[2], q[2], c) r[2], c = bits.Add64(p[2], q[2], c)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment