Commit 79112707 authored by Giovanni Bajo's avatar Giovanni Bajo

cmd/compile: add patterns for bit set/clear/complement on amd64

This patch completes implementation of BT(Q|L), and adds support
for BT(S|R|C)(Q|L).

Example of code changes from time.(*Time).addSec:

        if t.wall&hasMonotonic != 0 {
  0x1073465               488b08                  MOVQ 0(AX), CX
  0x1073468               4889ca                  MOVQ CX, DX
  0x107346b               48c1e93f                SHRQ $0x3f, CX
  0x107346f               48c1e13f                SHLQ $0x3f, CX
  0x1073473               48f7c1ffffffff          TESTQ $-0x1, CX
  0x107347a               746b                    JE 0x10734e7

        if t.wall&hasMonotonic != 0 {
  0x1073435               488b08                  MOVQ 0(AX), CX
  0x1073438               480fbae13f              BTQ $0x3f, CX
  0x107343d               7363                    JAE 0x10734a2

Another example:

                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x10734c8               4881e1ffffff3f          ANDQ $0x3fffffff, CX
  0x10734cf               48c1e61e                SHLQ $0x1e, SI
  0x10734d3               4809ce                  ORQ CX, SI
  0x10734d6               48b90000000000000080    MOVQ $0x8000000000000000, CX
  0x10734e0               4809f1                  ORQ SI, CX
  0x10734e3               488908                  MOVQ CX, 0(AX)

                        t.wall = t.wall&nsecMask | uint64(dsec)<<nsecShift | hasMonotonic
  0x107348b		4881e2ffffff3f		ANDQ $0x3fffffff, DX
  0x1073492		48c1e61e		SHLQ $0x1e, SI
  0x1073496		4809f2			ORQ SI, DX
  0x1073499		480fbaea3f		BTSQ $0x3f, DX
  0x107349e		488910			MOVQ DX, 0(AX)

Go1 benchmarks seem unaffected, and I would be surprised
otherwise:

name                     old time/op    new time/op     delta
BinaryTree17-4              2.64s ± 4%      2.56s ± 9%  -2.92%  (p=0.008 n=9+9)
Fannkuch11-4                2.90s ± 1%      2.95s ± 3%  +1.76%  (p=0.010 n=10+9)
FmtFprintfEmpty-4          35.3ns ± 1%     34.5ns ± 2%  -2.34%  (p=0.004 n=9+8)
FmtFprintfString-4         57.0ns ± 1%     58.4ns ± 5%  +2.52%  (p=0.029 n=9+10)
FmtFprintfInt-4            59.8ns ± 3%     59.8ns ± 6%    ~     (p=0.565 n=10+10)
FmtFprintfIntInt-4         93.9ns ± 3%     91.2ns ± 5%  -2.94%  (p=0.014 n=10+9)
FmtFprintfPrefixedInt-4     107ns ± 6%      104ns ± 6%    ~     (p=0.099 n=10+10)
FmtFprintfFloat-4           187ns ± 3%      188ns ± 3%    ~     (p=0.505 n=10+9)
FmtManyArgs-4               410ns ± 1%      415ns ± 6%    ~     (p=0.649 n=8+10)
GobDecode-4                5.30ms ± 3%     5.27ms ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4                4.62ms ± 5%     4.47ms ± 2%  -3.24%  (p=0.001 n=9+10)
Gzip-4                      197ms ± 4%      193ms ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                   30.4ms ± 3%     30.1ms ± 3%    ~     (p=0.481 n=10+10)
HTTPClientServer-4         76.3µs ± 1%     76.0µs ± 1%    ~     (p=0.236 n=8+9)
JSONEncode-4               10.5ms ± 9%     10.3ms ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4               42.3ms ±10%     41.3ms ± 2%    ~     (p=0.053 n=9+10)
Mandelbrot200-4            3.80ms ± 2%     3.72ms ± 2%  -2.15%  (p=0.001 n=9+10)
GoParse-4                  2.88ms ±10%     2.81ms ± 2%    ~     (p=0.247 n=10+10)
RegexpMatchEasy0_32-4      69.5ns ± 4%     68.6ns ± 2%    ~     (p=0.171 n=10+10)
RegexpMatchEasy0_1K-4       165ns ± 3%      162ns ± 3%    ~     (p=0.137 n=10+10)
RegexpMatchEasy1_32-4      65.7ns ± 6%     64.4ns ± 2%  -2.02%  (p=0.037 n=10+10)
RegexpMatchEasy1_1K-4       278ns ± 2%      279ns ± 3%    ~     (p=0.991 n=8+9)
RegexpMatchMedium_32-4     99.3ns ± 3%     98.5ns ± 4%    ~     (p=0.457 n=10+9)
RegexpMatchMedium_1K-4     30.1µs ± 1%     30.4µs ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4       1.40µs ± 2%     1.41µs ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4       42.5µs ± 1%     41.5µs ± 3%  -2.13%  (p=0.002 n=8+9)
Revcomp-4                   332ms ± 4%      328ms ± 5%    ~     (p=0.720 n=9+10)
Template-4                 48.3ms ± 2%     49.6ms ± 3%  +2.56%  (p=0.002 n=8+10)
TimeParse-4                 252ns ± 2%      249ns ± 3%    ~     (p=0.116 n=9+10)
TimeFormat-4                262ns ± 4%      252ns ± 3%  -4.01%  (p=0.000 n=9+10)

name                     old speed      new speed       delta
GobDecode-4               145MB/s ± 3%    146MB/s ± 3%    ~     (p=0.436 n=10+10)
GobEncode-4               166MB/s ± 5%    172MB/s ± 2%  +3.28%  (p=0.001 n=9+10)
Gzip-4                   98.6MB/s ± 4%  100.4MB/s ± 3%    ~     (p=0.123 n=10+10)
Gunzip-4                  639MB/s ± 3%    645MB/s ± 3%    ~     (p=0.481 n=10+10)
JSONEncode-4              185MB/s ± 8%    189MB/s ± 3%    ~     (p=0.280 n=10+10)
JSONDecode-4             46.0MB/s ± 9%   47.0MB/s ± 2%  +2.21%  (p=0.046 n=9+10)
GoParse-4                20.1MB/s ± 9%   20.6MB/s ± 2%    ~     (p=0.239 n=10+10)
RegexpMatchEasy0_32-4     460MB/s ± 4%    467MB/s ± 2%    ~     (p=0.165 n=10+10)
RegexpMatchEasy0_1K-4    6.19GB/s ± 3%   6.28GB/s ± 3%    ~     (p=0.165 n=10+10)
RegexpMatchEasy1_32-4     487MB/s ± 5%    497MB/s ± 2%  +2.00%  (p=0.043 n=10+10)
RegexpMatchEasy1_1K-4    3.67GB/s ± 2%   3.67GB/s ± 3%    ~     (p=0.963 n=8+9)
RegexpMatchMedium_32-4   10.1MB/s ± 3%   10.1MB/s ± 4%    ~     (p=0.435 n=10+9)
RegexpMatchMedium_1K-4   34.0MB/s ± 1%   33.7MB/s ± 2%    ~     (p=0.173 n=8+10)
RegexpMatchHard_32-4     22.9MB/s ± 2%   22.7MB/s ± 4%    ~     (p=0.565 n=10+10)
RegexpMatchHard_1K-4     24.0MB/s ± 3%   24.7MB/s ± 3%  +2.64%  (p=0.001 n=9+9)
Revcomp-4                 766MB/s ± 4%    775MB/s ± 5%    ~     (p=0.720 n=9+10)
Template-4               40.2MB/s ± 2%   39.2MB/s ± 3%  -2.47%  (p=0.002 n=8+10)

The rules match ~1800 times during all.bash.

Fixes #18943

Change-Id: I64be1ada34e89c486dfd935bf429b35652117ed4
Reviewed-on: https://go-review.googlesource.com/94766
Run-TryBot: Giovanni Bajo <rasky@develer.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 3afd2d7f
......@@ -194,7 +194,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
ssa.OpAMD64PXOR:
ssa.OpAMD64PXOR,
ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ:
r := v.Reg()
if r != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
......@@ -573,7 +576,10 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Args[0].Reg()
case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst:
case ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
ssa.OpAMD64BTSLconst, ssa.OpAMD64BTSQconst,
ssa.OpAMD64BTCLconst, ssa.OpAMD64BTCQconst,
ssa.OpAMD64BTRLconst, ssa.OpAMD64BTRQconst:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
p.From.Offset = v.AuxInt
......
......@@ -279,45 +279,6 @@ var linuxAMD64Tests = []*asmTest{
`,
pos: []string{"\tSHLQ\t\\$5,", "\tLEAQ\t\\(.*\\)\\(.*\\*2\\),"},
},
// Bit test ops on amd64, issue 18943.
{
fn: `
func f37(a, b uint64) int {
if a&(1<<(b&63)) != 0 {
return 1
}
return -1
}
`,
pos: []string{"\tBTQ\t"},
},
{
fn: `
func f38(a, b uint64) bool {
return a&(1<<(b&63)) != 0
}
`,
pos: []string{"\tBTQ\t"},
},
{
fn: `
func f39(a uint64) int {
if a&(1<<60) != 0 {
return 1
}
return -1
}
`,
pos: []string{"\tBTQ\t\\$60"},
},
{
fn: `
func f40(a uint64) bool {
return a&(1<<60) != 0
}
`,
pos: []string{"\tBTQ\t\\$60"},
},
// see issue 19595.
// We want to merge load+op in f58, but not in f59.
{
......
......@@ -643,8 +643,10 @@
(NE (TESTB (SETAE cmp) (SETAE cmp)) yes no) -> (UGE cmp yes no)
// Recognize bit tests: a&(1<<b) != 0 for b suitably bounded
// Note that ULT and SETB check the carry flag; they are identical to CS and SETCS.
// Same, mutatis mutandis, for UGE and SETAE, and CC and SETCC.
// Note that BTx instructions use the carry bit, so we need to convert tests for zero flag
// into tests for carry flags.
// ULT and SETB check the carry flag; they are identical to CS and SETCS. Same, mutatis
// mutandis, for UGE and SETAE, and CC and SETCC.
((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTL x y))
((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) && !config.nacl -> ((ULT|UGE) (BTQ x y))
((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(c) && !config.nacl
......@@ -673,6 +675,94 @@
(SET(NE|EQ)mem [off] {sym} ptr (TESTQ (MOVQconst [c]) x) mem) && isUint64PowerOfTwo(c) && !config.nacl
-> (SET(B|AE)mem [off] {sym} ptr (BTQconst [log2(c)] x) mem)
// Handle bit-testing in the form (a>>b)&1 != 0 by building the above rules
// and further combining shifts.
(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 -> (BTQconst [c+d] x)
(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d -> (BT(Q|L)const [c-d] x)
(BT(Q|L)const [0] s:(SHRQ x y)) -> (BTQ y x)
(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 -> (BTLconst [c+d] x)
(BTLconst [c] (SHLLconst [d] x)) && c>d -> (BTLconst [c-d] x)
(BTLconst [0] s:(SHRL x y)) -> (BTL y x)
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTS(Q|L) x y)
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) && !config.nacl -> (BTC(Q|L) x y)
// Convert ORconst into BTS, if the code gets smaller, with boundary being
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
((ORQ|XORQ)const [c] x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
-> (BT(S|C)Qconst [log2(c)] x)
((ORL|XORL)const [c] x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
-> (BT(S|C)Lconst [log2uint32(c)] x)
((ORQ|XORQ) (MOVQconst [c]) x) && isUint64PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
-> (BT(S|C)Qconst [log2(c)] x)
((ORL|XORL) (MOVLconst [c]) x) && isUint32PowerOfTwo(c) && uint64(c) >= 128 && !config.nacl
-> (BT(S|C)Lconst [log2uint32(c)] x)
// Recognize bit clearing: a &^= 1<<b
(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) && !config.nacl -> (BTR(Q|L) x y)
(ANDQconst [c] x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-> (BTRQconst [log2(^c)] x)
(ANDLconst [c] x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-> (BTRLconst [log2uint32(^c)] x)
(ANDQ (MOVQconst [c]) x) && isUint64PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-> (BTRQconst [log2(^c)] x)
(ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(^c) && uint64(^c) >= 128 && !config.nacl
-> (BTRLconst [log2uint32(^c)] x)
// Special-case bit patterns on first/last bit.
// generic.rules changes ANDs of high-part/low-part masks into a couple of shifts,
// for instance:
// x & 0xFFFF0000 -> (x >> 16) << 16
// x & 0x80000000 -> (x >> 31) << 31
//
// In case the mask is just one bit (like second example above), it conflicts
// with the above rules to detect bit-testing / bit-clearing of first/last bit.
// We thus special-case them, by detecting the shift patterns.
// Special case resetting first/last bit
(SHL(L|Q)const [1] (SHR(L|Q)const [1] x)) && !config.nacl
-> (BTR(L|Q)const [0] x)
(SHRLconst [1] (SHLLconst [1] x)) && !config.nacl
-> (BTRLconst [31] x)
(SHRQconst [1] (SHLQconst [1] x)) && !config.nacl
-> (BTRQconst [63] x)
// Special case testing first/last bit (with double-shift generated by generic.rules)
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2)) && z1==z2 && !config.nacl
-> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHLLconst [31] (SHRQconst [31] x)) z2)) && z1==z2 && !config.nacl
-> ((SETB|SETAE|ULT|UGE) (BTQconst [31] x))
(SET(NE|EQ)mem [off] {sym} ptr (TESTQ z1:(SHLQconst [63] (SHRQconst [63] x)) z2) mem) && z1==z2 && !config.nacl
-> (SET(B|AE)mem [off] {sym} ptr (BTQconst [63] x) mem)
(SET(NE|EQ)mem [off] {sym} ptr (TESTL z1:(SHLLconst [31] (SHRLconst [31] x)) z2) mem) && z1==z2 && !config.nacl
-> (SET(B|AE)mem [off] {sym} ptr (BTLconst [31] x) mem)
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2)) && z1==z2 && !config.nacl
-> ((SETB|SETAE|ULT|UGE) (BTQconst [0] x))
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2)) && z1==z2 && !config.nacl
-> ((SETB|SETAE|ULT|UGE) (BTLconst [0] x))
(SET(NE|EQ)mem [off] {sym} ptr (TESTQ z1:(SHRQconst [63] (SHLQconst [63] x)) z2) mem) && z1==z2 && !config.nacl
-> (SET(B|AE)mem [off] {sym} ptr (BTQconst [0] x) mem)
(SET(NE|EQ)mem [off] {sym} ptr (TESTL z1:(SHRLconst [31] (SHLLconst [31] x)) z2) mem) && z1==z2 && !config.nacl
-> (SET(B|AE)mem [off] {sym} ptr (BTLconst [0] x) mem)
// Special-case manually testing last bit with "a>>63 != 0" (without "&1")
((SETNE|SETEQ|NE|EQ) (TESTQ z1:(SHRQconst [63] x) z2)) && z1==z2 && !config.nacl
-> ((SETB|SETAE|ULT|UGE) (BTQconst [63] x))
((SETNE|SETEQ|NE|EQ) (TESTL z1:(SHRLconst [31] x) z2)) && z1==z2 && !config.nacl
-> ((SETB|SETAE|ULT|UGE) (BTLconst [31] x))
(SET(NE|EQ)mem [off] {sym} ptr (TESTQ z1:(SHRQconst [63] x) z2) mem) && z1==z2 && !config.nacl
-> (SET(B|AE)mem [off] {sym} ptr (BTQconst [63] x) mem)
(SET(NE|EQ)mem [off] {sym} ptr (TESTL z1:(SHRLconst [31] x) z2) mem) && z1==z2 && !config.nacl
-> (SET(B|AE)mem [off] {sym} ptr (BTLconst [31] x) mem)
// Fold combinations of bit ops on same bit. An example is math.Copysign(c,-1)
(BTS(Q|L)const [c] (BTR(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
(BTS(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTS(Q|L)const [c] x)
(BTR(Q|L)const [c] (BTS(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
(BTR(Q|L)const [c] (BTC(Q|L)const [c] x)) -> (BTR(Q|L)const [c] x)
// Fold boolean negation into SETcc.
(XORLconst [1] (SETNE x)) -> (SETEQ x)
(XORLconst [1] (SETEQ x)) -> (SETNE x)
......
......@@ -266,8 +266,20 @@ func init() {
{name: "BTL", argLength: 2, reg: gp2flags, asm: "BTL", typ: "Flags"}, // test whether bit arg0 % 32 in arg1 is set
{name: "BTQ", argLength: 2, reg: gp2flags, asm: "BTQ", typ: "Flags"}, // test whether bit arg0 % 64 in arg1 is set
{name: "BTCL", argLength: 2, reg: gp21, asm: "BTCL", resultInArg0: true, clobberFlags: true}, // complement bit arg0 % 32 in arg1
{name: "BTCQ", argLength: 2, reg: gp21, asm: "BTCQ", resultInArg0: true, clobberFlags: true}, // complement bit arg0 % 64 in arg1
{name: "BTRL", argLength: 2, reg: gp21, asm: "BTRL", resultInArg0: true, clobberFlags: true}, // reset bit arg0 % 32 in arg1
{name: "BTRQ", argLength: 2, reg: gp21, asm: "BTRQ", resultInArg0: true, clobberFlags: true}, // reset bit arg0 % 64 in arg1
{name: "BTSL", argLength: 2, reg: gp21, asm: "BTSL", resultInArg0: true, clobberFlags: true}, // set bit arg0 % 32 in arg1
{name: "BTSQ", argLength: 2, reg: gp21, asm: "BTSQ", resultInArg0: true, clobberFlags: true}, // set bit arg0 % 64 in arg1
{name: "BTLconst", argLength: 1, reg: gp1flags, asm: "BTL", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 32
{name: "BTQconst", argLength: 1, reg: gp1flags, asm: "BTQ", typ: "Flags", aux: "Int8"}, // test whether bit auxint in arg0 is set, 0 <= auxint < 64
{name: "BTCLconst", argLength: 1, reg: gp11, asm: "BTCL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 32
{name: "BTCQconst", argLength: 1, reg: gp11, asm: "BTCQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // complement bit auxint in arg0, 0 <= auxint < 64
{name: "BTRLconst", argLength: 1, reg: gp11, asm: "BTRL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 32
{name: "BTRQconst", argLength: 1, reg: gp11, asm: "BTRQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // reset bit auxint in arg0, 0 <= auxint < 64
{name: "BTSLconst", argLength: 1, reg: gp11, asm: "BTSL", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 32
{name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64
{name: "TESTQ", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTQ", typ: "Flags"}, // (arg0 & arg1) compare to 0
{name: "TESTL", argLength: 2, reg: gp2flags, commutative: true, asm: "TESTL", typ: "Flags"}, // (arg0 & arg1) compare to 0
......
......@@ -499,8 +499,20 @@ const (
OpAMD64UCOMISD
OpAMD64BTL
OpAMD64BTQ
OpAMD64BTCL
OpAMD64BTCQ
OpAMD64BTRL
OpAMD64BTRQ
OpAMD64BTSL
OpAMD64BTSQ
OpAMD64BTLconst
OpAMD64BTQconst
OpAMD64BTCLconst
OpAMD64BTCQconst
OpAMD64BTRLconst
OpAMD64BTRQconst
OpAMD64BTSLconst
OpAMD64BTSQconst
OpAMD64TESTQ
OpAMD64TESTL
OpAMD64TESTW
......@@ -5901,6 +5913,102 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "BTCL",
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTCQ",
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTRL",
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTRQ",
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTSL",
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTSQ",
argLen: 2,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
{1, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTLconst",
auxType: auxInt8,
......@@ -5923,6 +6031,102 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "BTCLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTCL,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTCQconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTCQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTRLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTRL,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTRQconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTRQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTSLconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTSL,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "BTSQconst",
auxType: auxInt8,
argLen: 1,
resultInArg0: true,
clobberFlags: true,
asm: x86.ABTSQ,
reg: regInfo{
inputs: []inputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
outputs: []outputInfo{
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
},
},
},
{
name: "TESTQ",
argLen: 2,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -6,9 +6,258 @@
package codegen
func bitcheck(a, b uint64) int {
if a&(1<<(b&63)) != 0 { // amd64:"BTQ"
/************************************
* 64-bit instructions
************************************/
func bitcheck64_constleft(a uint64) (n int) {
// amd64:"BTQ\t[$]63"
if a&(1<<63) != 0 {
return 1
}
// amd64:"BTQ\t[$]60"
if a&(1<<60) != 0 {
return 1
}
// amd64:"BTL\t[$]0"
if a&(1<<0) != 0 {
return 1
}
return 0
}
func bitcheck64_constright(a [8]uint64) (n int) {
// amd64:"BTQ\t[$]63"
if (a[0]>>63)&1 != 0 {
return 1
}
// amd64:"BTQ\t[$]63"
if a[1]>>63 != 0 {
return 1
}
// amd64:"BTQ\t[$]63"
if a[2]>>63 == 0 {
return 1
}
// amd64:"BTQ\t[$]60"
if (a[3]>>60)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]1"
if (a[4]>>1)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]0"
if (a[5]>>0)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]7"
if (a[6]>>5)&4 == 0 {
return 1
}
return 0
}
func bitcheck64_var(a, b uint64) (n int) {
// amd64:"BTQ"
if a&(1<<(b&63)) != 0 {
return 1
}
// amd64:"BTQ",-"BT.\t[$]0"
if (b>>(a&63))&1 != 0 {
return 1
}
return 0
}
func bitcheck64_mask(a uint64) (n int) {
// amd64:"BTQ\t[$]63"
if a&0x8000000000000000 != 0 {
return 1
}
// amd64:"BTQ\t[$]59"
if a&0x800000000000000 != 0 {
return 1
}
// amd64:"BTL\t[$]0"
if a&0x1 != 0 {
return 1
}
return 0
}
func biton64(a, b uint64) (n uint64) {
// amd64:"BTSQ"
n += b | (1 << (a & 63))
// amd64:"BTSQ\t[$]63"
n += a | (1 << 63)
// amd64:"BTSQ\t[$]60"
n += a | (1 << 60)
// amd64:"ORQ\t[$]1"
n += a | (1 << 0)
return n
}
func bitoff64(a, b uint64) (n uint64) {
// amd64:"BTRQ"
n += b &^ (1 << (a & 63))
// amd64:"BTRQ\t[$]63"
n += a &^ (1 << 63)
// amd64:"BTRQ\t[$]60"
n += a &^ (1 << 60)
// amd64:"ANDQ\t[$]-2"
n += a &^ (1 << 0)
return n
}
func bitcompl64(a, b uint64) (n uint64) {
// amd64:"BTCQ"
n += b ^ (1 << (a & 63))
// amd64:"BTCQ\t[$]63"
n += a ^ (1 << 63)
// amd64:"BTCQ\t[$]60"
n += a ^ (1 << 60)
// amd64:"XORQ\t[$]1"
n += a ^ (1 << 0)
return n
}
/************************************
* 32-bit instructions
************************************/
func bitcheck32_constleft(a uint32) (n int) {
// amd64:"BTL\t[$]31"
if a&(1<<31) != 0 {
return 1
}
// amd64:"BTL\t[$]28"
if a&(1<<28) != 0 {
return 1
}
// amd64:"BTL\t[$]0"
if a&(1<<0) != 0 {
return 1
}
return 0
}
func bitcheck32_constright(a [8]uint32) (n int) {
// amd64:"BTL\t[$]31"
if (a[0]>>31)&1 != 0 {
return 1
}
// amd64:"BTL\t[$]31"
if a[1]>>31 != 0 {
return 1
}
// amd64:"BTL\t[$]31"
if a[2]>>31 == 0 {
return 1
}
// amd64:"BTL\t[$]28"
if (a[3]>>28)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]1"
if (a[4]>>1)&1 == 0 {
return 1
}
// amd64:"BTL\t[$]0"
if (a[5]>>0)&1 == 0 {
return 1
}
return -1
// amd64:"BTL\t[$]7"
if (a[6]>>5)&4 == 0 {
return 1
}
return 0
}
func bitcheck32_var(a, b uint32) (n int) {
// amd64:"BTL"
if a&(1<<(b&31)) != 0 {
return 1
}
// amd64:"BTL",-"BT.\t[$]0"
if (b>>(a&31))&1 != 0 {
return 1
}
return 0
}
func bitcheck32_mask(a uint32) (n int) {
// amd64:"BTL\t[$]31"
if a&0x80000000 != 0 {
return 1
}
// amd64:"BTL\t[$]27"
if a&0x8000000 != 0 {
return 1
}
// amd64:"BTL\t[$]0"
if a&0x1 != 0 {
return 1
}
return 0
}
func biton32(a, b uint32) (n uint32) {
// amd64:"BTSL"
n += b | (1 << (a & 31))
// amd64:"BTSL\t[$]31"
n += a | (1 << 31)
// amd64:"BTSL\t[$]28"
n += a | (1 << 28)
// amd64:"ORL\t[$]1"
n += a | (1 << 0)
return n
}
func bitoff32(a, b uint32) (n uint32) {
// amd64:"BTRL"
n += b &^ (1 << (a & 31))
// amd64:"BTRL\t[$]31"
n += a &^ (1 << 31)
// amd64:"BTRL\t[$]28"
n += a &^ (1 << 28)
// amd64:"ANDL\t[$]-2"
n += a &^ (1 << 0)
return n
}
func bitcompl32(a, b uint32) (n uint32) {
// amd64:"BTCL"
n += b ^ (1 << (a & 31))
// amd64:"BTCL\t[$]31"
n += a ^ (1 << 31)
// amd64:"BTCL\t[$]28"
n += a ^ (1 << 28)
// amd64:"XORL\t[$]1"
n += a ^ (1 << 0)
return n
}
......@@ -41,12 +41,12 @@ func sqrt(x float64) float64 {
// Check that it's using integer registers
func abs(x, y float64) {
// amd64:"SHLQ\t[$]1","SHRQ\t[$]1,"
// amd64:"BTRQ\t[$]63"
// s390x:"LPDFR\t",-"MOVD\t" (no integer load/store)
// ppc64le:"FABS\t"
sink64[0] = math.Abs(x)
// amd64:"SHLQ\t[$]1","SHRQ\t[$]1,"
// amd64:"BTRQ\t[$]63","PXOR" (TODO: this should be BTSQ)
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
// ppc64le:"FNABS\t"
sink64[1] = -math.Abs(y)
......@@ -60,12 +60,12 @@ func abs32(x float32) float32 {
// Check that it's using integer registers
func copysign(a, b, c float64) {
// amd64:"SHLQ\t[$]1","SHRQ\t[$]1","SHRQ\t[$]63","SHLQ\t[$]63","ORQ"
// amd64:"BTRQ\t[$]63","SHRQ\t[$]63","SHLQ\t[$]63","ORQ"
// s390x:"CPSDR",-"MOVD" (no integer load/store)
// ppc64le:"FCPSGN"
sink64[0] = math.Copysign(a, b)
// amd64:"SHLQ\t[$]1","SHRQ\t[$]1",-"SHRQ\t[$]63",-"SHLQ\t[$]63","ORQ"
// amd64:"BTSQ\t[$]63"
// s390x:"LNDFR\t",-"MOVD\t" (no integer load/store)
// ppc64le:"FCPSGN"
sink64[1] = math.Copysign(c, -1)
......
......@@ -199,19 +199,19 @@ func TrailingZeros64(n uint64) int {
}
func TrailingZeros32(n uint32) int {
// amd64:"MOVQ\t\\$4294967296","ORQ\t[^$]","BSFQ"
// amd64:"BTSQ\\t\\$32","BSFQ"
// s390x:"FLOGR","MOVWZ"
return bits.TrailingZeros32(n)
}
func TrailingZeros16(n uint16) int {
// amd64:"BSFQ","ORQ\t\\$65536"
// amd64:"BSFQ","BTSQ\\t\\$16"
// s390x:"FLOGR","OR\t\\$65536"
return bits.TrailingZeros16(n)
}
func TrailingZeros8(n uint8) int {
// amd64:"BSFQ","ORQ\t\\$256"
// amd64:"BSFQ","BTSQ\\t\\$8"
// s390x:"FLOGR","OR\t\\$256"
return bits.TrailingZeros8(n)
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment