Commit 34f5f8a5 authored by Ben Shi's avatar Ben Shi Committed by Cherry Zhang

cmd/compile: optimize ARM64 with register indexed load/store

ARM64 supports load/store instructions with a memory operand that
the address is calculated by base register + index register.

In this CL,
1. Some rules are added to the compile's ARM64 backend to emit
such efficient instructions.
2. A wrong rule of load combination is fixed.

The go1 benchmark does show improvement.

name                     old time/op    new time/op    delta
BinaryTree17-4              44.5s ± 2%     44.1s ± 1%   -0.81%  (p=0.000 n=28+29)
Fannkuch11-4                32.7s ± 3%     30.5s ± 0%   -6.79%  (p=0.000 n=30+26)
FmtFprintfEmpty-4           499ns ± 0%     506ns ± 5%   +1.39%  (p=0.003 n=25+30)
FmtFprintfString-4         1.07µs ± 0%    1.04µs ± 4%   -3.17%  (p=0.000 n=23+30)
FmtFprintfInt-4            1.15µs ± 4%    1.13µs ± 0%   -1.55%  (p=0.000 n=30+23)
FmtFprintfIntInt-4         1.77µs ± 4%    1.74µs ± 0%   -1.71%  (p=0.000 n=30+24)
FmtFprintfPrefixedInt-4    2.37µs ± 5%    2.12µs ± 0%  -10.56%  (p=0.000 n=30+23)
FmtFprintfFloat-4          3.03µs ± 1%    3.03µs ± 4%   -0.13%  (p=0.003 n=25+30)
FmtManyArgs-4              7.38µs ± 1%    7.43µs ± 4%   +0.59%  (p=0.003 n=25+30)
GobDecode-4                 101ms ± 6%      95ms ± 5%   -5.55%  (p=0.000 n=30+30)
GobEncode-4                78.0ms ± 4%    78.8ms ± 6%   +1.05%  (p=0.000 n=30+30)
Gzip-4                      4.25s ± 0%     4.27s ± 4%   +0.45%  (p=0.003 n=24+30)
Gunzip-4                    428ms ± 1%     420ms ± 0%   -1.88%  (p=0.000 n=23+23)
HTTPClientServer-4          549µs ± 1%     541µs ± 1%   -1.56%  (p=0.000 n=29+29)
JSONEncode-4                194ms ± 0%     188ms ± 4%     ~     (p=0.417 n=23+30)
JSONDecode-4                890ms ± 5%     831ms ± 0%   -6.55%  (p=0.000 n=30+23)
Mandelbrot200-4            47.3ms ± 2%    46.5ms ± 0%     ~     (p=0.980 n=30+26)
GoParse-4                  43.1ms ± 6%    43.8ms ± 6%   +1.65%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4      1.06µs ± 0%    1.07µs ± 3%     ~     (p=0.092 n=23+30)
RegexpMatchEasy0_1K-4      5.53µs ± 0%    5.51µs ± 0%   -0.24%  (p=0.000 n=25+25)
RegexpMatchEasy1_32-4      1.02µs ± 3%    1.01µs ± 0%   -1.27%  (p=0.000 n=30+24)
RegexpMatchEasy1_1K-4      7.26µs ± 0%    7.33µs ± 0%   +0.95%  (p=0.000 n=23+26)
RegexpMatchMedium_32-4     1.84µs ± 7%    1.79µs ± 1%     ~     (p=0.333 n=30+23)
RegexpMatchMedium_1K-4      553µs ± 0%     547µs ± 0%   -1.14%  (p=0.000 n=24+22)
RegexpMatchHard_32-4       30.8µs ± 1%    30.3µs ± 0%   -1.40%  (p=0.000 n=24+24)
RegexpMatchHard_1K-4        928µs ± 0%     929µs ± 5%   +0.12%  (p=0.013 n=23+30)
Revcomp-4                   8.13s ± 4%     6.32s ± 1%  -22.23%  (p=0.000 n=30+23)
Template-4                  899ms ± 6%     854ms ± 1%   -5.01%  (p=0.000 n=30+24)
TimeParse-4                4.66µs ± 4%    4.59µs ± 1%   -1.57%  (p=0.000 n=30+23)
TimeFormat-4               4.58µs ± 0%    4.61µs ± 0%   +0.57%  (p=0.000 n=26+24)
[Geo mean]                  717µs          698µs        -2.55%

name                     old speed      new speed      delta
GobDecode-4              7.63MB/s ± 6%  8.08MB/s ± 5%   +5.88%  (p=0.000 n=30+30)
GobEncode-4              9.85MB/s ± 4%  9.75MB/s ± 6%   -1.04%  (p=0.000 n=30+30)
Gzip-4                   4.56MB/s ± 0%  4.55MB/s ± 4%   -0.36%  (p=0.003 n=24+30)
Gunzip-4                 45.3MB/s ± 1%  46.2MB/s ± 0%   +1.92%  (p=0.000 n=23+23)
JSONEncode-4             10.0MB/s ± 0%  10.4MB/s ± 4%     ~     (p=0.403 n=23+30)
JSONDecode-4             2.18MB/s ± 5%  2.33MB/s ± 0%   +6.91%  (p=0.000 n=30+23)
GoParse-4                1.34MB/s ± 5%  1.32MB/s ± 5%   -1.66%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4    30.2MB/s ± 0%  29.8MB/s ± 3%     ~     (p=0.099 n=23+30)
RegexpMatchEasy0_1K-4     185MB/s ± 0%   186MB/s ± 0%   +0.24%  (p=0.000 n=25+25)
RegexpMatchEasy1_32-4    31.4MB/s ± 3%  31.8MB/s ± 0%   +1.24%  (p=0.000 n=30+24)
RegexpMatchEasy1_1K-4     141MB/s ± 0%   140MB/s ± 0%   -0.94%  (p=0.000 n=23+26)
RegexpMatchMedium_32-4    541kB/s ± 6%   560kB/s ± 0%   +3.45%  (p=0.000 n=30+23)
RegexpMatchMedium_1K-4   1.85MB/s ± 0%  1.87MB/s ± 0%   +1.08%  (p=0.000 n=24+23)
RegexpMatchHard_32-4     1.04MB/s ± 1%  1.06MB/s ± 1%   +1.48%  (p=0.000 n=24+24)
RegexpMatchHard_1K-4     1.10MB/s ± 0%  1.10MB/s ± 5%   +0.15%  (p=0.004 n=23+30)
Revcomp-4                31.3MB/s ± 4%  40.2MB/s ± 1%  +28.52%  (p=0.000 n=30+23)
Template-4               2.16MB/s ± 6%  2.27MB/s ± 1%   +5.18%  (p=0.000 n=30+24)
[Geo mean]               7.57MB/s       7.79MB/s        +2.98%

fixes #24907

Change-Id: I94afd0e3f53d62a1cf5e452f3dd6daf61be21785
Reviewed-on: https://go-review.googlesource.com/107376
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent d5a52e70
...@@ -341,6 +341,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -341,6 +341,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
gc.AddAux(&p.From, v) gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpARM64MOVBloadidx,
ssa.OpARM64MOVBUloadidx,
ssa.OpARM64MOVHloadidx,
ssa.OpARM64MOVHUloadidx,
ssa.OpARM64MOVWloadidx,
ssa.OpARM64MOVWUloadidx,
ssa.OpARM64MOVDloadidx:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Name = obj.NAME_NONE
p.From.Reg = v.Args[0].Reg()
p.From.Index = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64LDAR, case ssa.OpARM64LDAR,
ssa.OpARM64LDARW: ssa.OpARM64LDARW:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
...@@ -363,6 +377,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -363,6 +377,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v) gc.AddAux(&p.To, v)
case ssa.OpARM64MOVBstoreidx,
ssa.OpARM64MOVHstoreidx,
ssa.OpARM64MOVWstoreidx,
ssa.OpARM64MOVDstoreidx:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()
case ssa.OpARM64STP: case ssa.OpARM64STP:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG p.From.Type = obj.TYPE_REGREG
...@@ -381,6 +406,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -381,6 +406,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v) gc.AddAux(&p.To, v)
case ssa.OpARM64MOVBstorezeroidx,
ssa.OpARM64MOVHstorezeroidx,
ssa.OpARM64MOVWstorezeroidx,
ssa.OpARM64MOVDstorezeroidx:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()
case ssa.OpARM64MOVQstorezero: case ssa.OpARM64MOVQstorezero:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG p.From.Type = obj.TYPE_REGREG
......
...@@ -144,6 +144,7 @@ func init() { ...@@ -144,6 +144,7 @@ func init() {
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
gpstore0 = regInfo{inputs: []regMask{gpspsbg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
...@@ -318,6 +319,15 @@ func init() { ...@@ -318,6 +319,15 @@ func init() {
{name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
{name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
// register indexed load
{name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", faultOnNilArg0: true}, // load 64-bit dword from arg0 + arg1, arg2 = mem.
{name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
...@@ -326,12 +336,24 @@ func init() { ...@@ -326,12 +336,24 @@ func init() {
{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
// register indexed store
{name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem.
// register indexed store zero
{name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
{name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion) {name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion)
{name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion) {name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion)
......
...@@ -1152,6 +1152,13 @@ const ( ...@@ -1152,6 +1152,13 @@ const (
OpARM64MOVDload OpARM64MOVDload
OpARM64FMOVSload OpARM64FMOVSload
OpARM64FMOVDload OpARM64FMOVDload
OpARM64MOVDloadidx
OpARM64MOVWloadidx
OpARM64MOVWUloadidx
OpARM64MOVHloadidx
OpARM64MOVHUloadidx
OpARM64MOVBloadidx
OpARM64MOVBUloadidx
OpARM64MOVBstore OpARM64MOVBstore
OpARM64MOVHstore OpARM64MOVHstore
OpARM64MOVWstore OpARM64MOVWstore
...@@ -1159,11 +1166,19 @@ const ( ...@@ -1159,11 +1166,19 @@ const (
OpARM64STP OpARM64STP
OpARM64FMOVSstore OpARM64FMOVSstore
OpARM64FMOVDstore OpARM64FMOVDstore
OpARM64MOVBstoreidx
OpARM64MOVHstoreidx
OpARM64MOVWstoreidx
OpARM64MOVDstoreidx
OpARM64MOVBstorezero OpARM64MOVBstorezero
OpARM64MOVHstorezero OpARM64MOVHstorezero
OpARM64MOVWstorezero OpARM64MOVWstorezero
OpARM64MOVDstorezero OpARM64MOVDstorezero
OpARM64MOVQstorezero OpARM64MOVQstorezero
OpARM64MOVBstorezeroidx
OpARM64MOVHstorezeroidx
OpARM64MOVWstorezeroidx
OpARM64MOVDstorezeroidx
OpARM64FMOVDgpfp OpARM64FMOVDgpfp
OpARM64FMOVDfpgp OpARM64FMOVDfpgp
OpARM64MOVBreg OpARM64MOVBreg
...@@ -15070,6 +15085,111 @@ var opcodeTable = [...]opInfo{ ...@@ -15070,6 +15085,111 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVDloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVWloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVWUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVWU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVHloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVHUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVHU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVBloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVBUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVBU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{ {
name: "MOVBstore", name: "MOVBstore",
auxType: auxSymOff, auxType: auxSymOff,
...@@ -15169,6 +15289,58 @@ var opcodeTable = [...]opInfo{ ...@@ -15169,6 +15289,58 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVBstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVHstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVWstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVDstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{ {
name: "MOVBstorezero", name: "MOVBstorezero",
auxType: auxSymOff, auxType: auxSymOff,
...@@ -15234,6 +15406,54 @@ var opcodeTable = [...]opInfo{ ...@@ -15234,6 +15406,54 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVBstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVHstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVWstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVDstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{ {
name: "FMOVDgpfp", name: "FMOVDgpfp",
argLen: 1, argLen: 1,
......
...@@ -22,7 +22,7 @@ var sink16 uint16 ...@@ -22,7 +22,7 @@ var sink16 uint16
func load_le64(b []byte) { func load_le64(b []byte) {
// amd64:`MOVQ\s\(.*\),` // amd64:`MOVQ\s\(.*\),`
// s390x:`MOVDBR\s\(.*\),` // s390x:`MOVDBR\s\(.*\),`
// arm64:`MOVD\s\(R[0-9]+\),` // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
// ppc64le:`MOVD\s`,-`MOV[BHW]Z` // ppc64le:`MOVD\s`,-`MOV[BHW]Z`
sink64 = binary.LittleEndian.Uint64(b) sink64 = binary.LittleEndian.Uint64(b)
} }
...@@ -30,7 +30,7 @@ func load_le64(b []byte) { ...@@ -30,7 +30,7 @@ func load_le64(b []byte) {
func load_le64_idx(b []byte, idx int) { func load_le64_idx(b []byte, idx int) {
// amd64:`MOVQ\s\(.*\)\(.*\*1\),` // amd64:`MOVQ\s\(.*\)\(.*\*1\),`
// s390x:`MOVDBR\s\(.*\)\(.*\*1\),` // s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
// arm64:`MOVD\s\(R[0-9]+\),` // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
// ppc64le:`MOVD\s`,-`MOV[BHW]Z\s` // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
sink64 = binary.LittleEndian.Uint64(b[idx:]) sink64 = binary.LittleEndian.Uint64(b[idx:])
} }
...@@ -38,7 +38,7 @@ func load_le64_idx(b []byte, idx int) { ...@@ -38,7 +38,7 @@ func load_le64_idx(b []byte, idx int) {
func load_le32(b []byte) { func load_le32(b []byte) {
// amd64:`MOVL\s\(.*\),` 386:`MOVL\s\(.*\),` // amd64:`MOVL\s\(.*\),` 386:`MOVL\s\(.*\),`
// s390x:`MOVWBR\s\(.*\),` // s390x:`MOVWBR\s\(.*\),`
// arm64:`MOVWU\s\(R[0-9]+\),` // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
// ppc64le:`MOVWZ\s` // ppc64le:`MOVWZ\s`
sink32 = binary.LittleEndian.Uint32(b) sink32 = binary.LittleEndian.Uint32(b)
} }
...@@ -46,7 +46,7 @@ func load_le32(b []byte) { ...@@ -46,7 +46,7 @@ func load_le32(b []byte) {
func load_le32_idx(b []byte, idx int) { func load_le32_idx(b []byte, idx int) {
// amd64:`MOVL\s\(.*\)\(.*\*1\),` 386:`MOVL\s\(.*\)\(.*\*1\),` // amd64:`MOVL\s\(.*\)\(.*\*1\),` 386:`MOVL\s\(.*\)\(.*\*1\),`
// s390x:`MOVWBR\s\(.*\)\(.*\*1\),` // s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
// arm64:`MOVWU\s\(R[0-9]+\),` // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
// ppc64le:`MOVWZ\s` // ppc64le:`MOVWZ\s`
sink32 = binary.LittleEndian.Uint32(b[idx:]) sink32 = binary.LittleEndian.Uint32(b[idx:])
} }
...@@ -54,50 +54,54 @@ func load_le32_idx(b []byte, idx int) { ...@@ -54,50 +54,54 @@ func load_le32_idx(b []byte, idx int) {
func load_le16(b []byte) { func load_le16(b []byte) {
// amd64:`MOVWLZX\s\(.*\),` // amd64:`MOVWLZX\s\(.*\),`
// ppc64le:`MOVHZ\s` // ppc64le:`MOVHZ\s`
// arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
sink16 = binary.LittleEndian.Uint16(b) sink16 = binary.LittleEndian.Uint16(b)
} }
func load_le16_idx(b []byte, idx int) { func load_le16_idx(b []byte, idx int) {
// amd64:`MOVWLZX\s\(.*\),` // amd64:`MOVWLZX\s\(.*\),`
// ppc64le:`MOVHZ\s` // ppc64le:`MOVHZ\s`
// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
sink16 = binary.LittleEndian.Uint16(b[idx:]) sink16 = binary.LittleEndian.Uint16(b[idx:])
} }
func load_be64(b []byte) { func load_be64(b []byte) {
// amd64:`BSWAPQ` // amd64:`BSWAPQ`
// s390x:`MOVD\s\(.*\),` // s390x:`MOVD\s\(.*\),`
// arm64:`REV` // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
sink64 = binary.BigEndian.Uint64(b) sink64 = binary.BigEndian.Uint64(b)
} }
func load_be64_idx(b []byte, idx int) { func load_be64_idx(b []byte, idx int) {
// amd64:`BSWAPQ` // amd64:`BSWAPQ`
// s390x:`MOVD\s\(.*\)\(.*\*1\),` // s390x:`MOVD\s\(.*\)\(.*\*1\),`
// arm64:`REV` // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
sink64 = binary.BigEndian.Uint64(b[idx:]) sink64 = binary.BigEndian.Uint64(b[idx:])
} }
func load_be32(b []byte) { func load_be32(b []byte) {
// amd64:`BSWAPL` // amd64:`BSWAPL`
// s390x:`MOVWZ\s\(.*\),` // s390x:`MOVWZ\s\(.*\),`
// arm64:`REVW` // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
sink32 = binary.BigEndian.Uint32(b) sink32 = binary.BigEndian.Uint32(b)
} }
func load_be32_idx(b []byte, idx int) { func load_be32_idx(b []byte, idx int) {
// amd64:`BSWAPL` // amd64:`BSWAPL`
// s390x:`MOVWZ\s\(.*\)\(.*\*1\),` // s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
// arm64:`REVW` // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
sink32 = binary.BigEndian.Uint32(b[idx:]) sink32 = binary.BigEndian.Uint32(b[idx:])
} }
func load_be16(b []byte) { func load_be16(b []byte) {
// amd64:`ROLW\s\$8` // amd64:`ROLW\s\$8`
// arm64: `REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
sink16 = binary.BigEndian.Uint16(b) sink16 = binary.BigEndian.Uint16(b)
} }
func load_be16_idx(b []byte, idx int) { func load_be16_idx(b []byte, idx int) {
// amd64:`ROLW\s\$8` // amd64:`ROLW\s\$8`
// arm64: `REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
sink16 = binary.BigEndian.Uint16(b[idx:]) sink16 = binary.BigEndian.Uint16(b[idx:])
} }
...@@ -162,7 +166,7 @@ func store_le64(b []byte) { ...@@ -162,7 +166,7 @@ func store_le64(b []byte) {
func store_le64_idx(b []byte, idx int) { func store_le64_idx(b []byte, idx int) {
// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.` // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
// arm64:`MOVD`,-`MOV[WBH]` // arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
// ppc64le:`MOVD\s`,-`MOV[BHW]\s` // ppc64le:`MOVD\s`,-`MOV[BHW]\s`
binary.LittleEndian.PutUint64(b[idx:], sink64) binary.LittleEndian.PutUint64(b[idx:], sink64)
} }
...@@ -176,7 +180,7 @@ func store_le32(b []byte) { ...@@ -176,7 +180,7 @@ func store_le32(b []byte) {
func store_le32_idx(b []byte, idx int) { func store_le32_idx(b []byte, idx int) {
// amd64:`MOVL\s` // amd64:`MOVL\s`
// arm64:`MOVW`,-`MOV[BH]` // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
// ppc64le:`MOVW\s` // ppc64le:`MOVW\s`
binary.LittleEndian.PutUint32(b[idx:], sink32) binary.LittleEndian.PutUint32(b[idx:], sink32)
} }
...@@ -190,32 +194,32 @@ func store_le16(b []byte) { ...@@ -190,32 +194,32 @@ func store_le16(b []byte) {
func store_le16_idx(b []byte, idx int) { func store_le16_idx(b []byte, idx int) {
// amd64:`MOVW\s` // amd64:`MOVW\s`
// arm64:`MOVH`,-`MOVB` // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
// ppc64le(DISABLED):`MOVH\s` // ppc64le(DISABLED):`MOVH\s`
binary.LittleEndian.PutUint16(b[idx:], sink16) binary.LittleEndian.PutUint16(b[idx:], sink16)
} }
func store_be64(b []byte) { func store_be64(b []byte) {
// amd64:`BSWAPQ`,-`SHR.` // amd64:`BSWAPQ`,-`SHR.`
// arm64:`MOVD`,`REV`,-`MOV[WBH]` // arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
binary.BigEndian.PutUint64(b, sink64) binary.BigEndian.PutUint64(b, sink64)
} }
func store_be64_idx(b []byte, idx int) { func store_be64_idx(b []byte, idx int) {
// amd64:`BSWAPQ`,-`SHR.` // amd64:`BSWAPQ`,-`SHR.`
// arm64:`MOVD`,`REV`,-`MOV[WBH]` // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
binary.BigEndian.PutUint64(b[idx:], sink64) binary.BigEndian.PutUint64(b[idx:], sink64)
} }
func store_be32(b []byte) { func store_be32(b []byte) {
// amd64:`BSWAPL`,-`SHR.` // amd64:`BSWAPL`,-`SHR.`
// arm64:`MOVW`,`REVW`,-`MOV[BH]` // arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
binary.BigEndian.PutUint32(b, sink32) binary.BigEndian.PutUint32(b, sink32)
} }
func store_be32_idx(b []byte, idx int) { func store_be32_idx(b []byte, idx int) {
// amd64:`BSWAPL`,-`SHR.` // amd64:`BSWAPL`,-`SHR.`
// arm64:`MOVW`,`REVW`,-`MOV[BH]` // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
binary.BigEndian.PutUint32(b[idx:], sink32) binary.BigEndian.PutUint32(b[idx:], sink32)
} }
...@@ -227,7 +231,7 @@ func store_be16(b []byte) { ...@@ -227,7 +231,7 @@ func store_be16(b []byte) {
func store_be16_idx(b []byte, idx int) { func store_be16_idx(b []byte, idx int) {
// amd64:`ROLW\s\$8`,-`SHR.` // amd64:`ROLW\s\$8`,-`SHR.`
// arm64:`MOVH`,`REV16W`,-`MOVB` // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
binary.BigEndian.PutUint16(b[idx:], sink16) binary.BigEndian.PutUint16(b[idx:], sink16)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment