Commit aaf73c6d authored by Ben Shi's avatar Ben Shi Committed by Cherry Zhang

cmd/compile: optimize ARM64 with shifted register indexed load/store

ARM64 supports efficient instructions which combine shift, addition, load/store
together. Such as "MOVD (R0)(R1<<3), R2" and "MOVWU R6, (R4)(R1<<2)".

This CL optimizes the compiler to emit such efficient instuctions. And below
is some test data.

1. binary size before/after
binary                 size change
pkg/linux_arm64        +80.1KB
pkg/tool/linux_arm64   +121.9KB
go                     -4.3KB
gofmt                  -64KB

2. go1 benchmark
There is big improvement for the test case Fannkuch11, and slight
improvement for sme others, excluding noise.

name                     old time/op    new time/op    delta
BinaryTree17-4              43.9s ± 2%     44.0s ± 2%     ~     (p=0.820 n=30+30)
Fannkuch11-4                30.6s ± 2%     24.5s ± 3%  -19.93%  (p=0.000 n=25+30)
FmtFprintfEmpty-4           500ns ± 0%     499ns ± 0%   -0.11%  (p=0.000 n=23+25)
FmtFprintfString-4         1.03µs ± 0%    1.04µs ± 3%     ~     (p=0.065 n=29+30)
FmtFprintfInt-4            1.15µs ± 3%    1.15µs ± 4%   -0.56%  (p=0.000 n=30+30)
FmtFprintfIntInt-4         1.80µs ± 5%    1.82µs ± 0%     ~     (p=0.094 n=30+24)
FmtFprintfPrefixedInt-4    2.17µs ± 5%    2.20µs ± 0%     ~     (p=0.100 n=30+23)
FmtFprintfFloat-4          3.08µs ± 3%    3.09µs ± 4%     ~     (p=0.123 n=30+30)
FmtManyArgs-4              7.41µs ± 4%    7.17µs ± 1%   -3.26%  (p=0.000 n=30+23)
GobDecode-4                93.7ms ± 0%    94.7ms ± 4%     ~     (p=0.685 n=24+30)
GobEncode-4                78.7ms ± 7%    77.1ms ± 0%     ~     (p=0.729 n=30+23)
Gzip-4                      4.01s ± 0%     3.97s ± 5%   -1.11%  (p=0.037 n=24+30)
Gunzip-4                    389ms ± 4%     384ms ± 0%     ~     (p=0.155 n=30+23)
HTTPClientServer-4          536µs ± 1%     537µs ± 1%     ~     (p=0.236 n=30+30)
JSONEncode-4                179ms ± 1%     182ms ± 6%     ~     (p=0.763 n=24+30)
JSONDecode-4                843ms ± 0%     839ms ± 6%   -0.42%  (p=0.003 n=25+30)
Mandelbrot200-4            46.5ms ± 0%    46.5ms ± 0%   +0.02%  (p=0.000 n=26+26)
GoParse-4                  44.3ms ± 6%    43.3ms ± 0%     ~     (p=0.067 n=30+27)
RegexpMatchEasy0_32-4      1.07µs ± 7%    1.07µs ± 4%     ~     (p=0.835 n=30+30)
RegexpMatchEasy0_1K-4      5.51µs ± 0%    5.49µs ± 0%   -0.35%  (p=0.000 n=23+26)
RegexpMatchEasy1_32-4      1.01µs ± 0%    1.02µs ± 4%   +0.96%  (p=0.014 n=24+30)
RegexpMatchEasy1_1K-4      7.43µs ± 0%    7.18µs ± 0%   -3.41%  (p=0.000 n=23+24)
RegexpMatchMedium_32-4     1.78µs ± 0%    1.81µs ± 4%   +1.47%  (p=0.012 n=23+30)
RegexpMatchMedium_1K-4      547µs ± 1%     542µs ± 3%   -0.90%  (p=0.003 n=24+30)
RegexpMatchHard_32-4       30.4µs ± 0%    29.7µs ± 0%   -2.15%  (p=0.000 n=19+23)
RegexpMatchHard_1K-4        913µs ± 0%     915µs ± 6%   +0.25%  (p=0.012 n=24+30)
Revcomp-4                   6.32s ± 1%     6.42s ± 4%     ~     (p=0.342 n=25+30)
Template-4                  868ms ± 6%     878ms ± 6%   +1.15%  (p=0.000 n=30+30)
TimeParse-4                4.57µs ± 4%    4.59µs ± 3%   +0.65%  (p=0.010 n=29+30)
TimeFormat-4               4.51µs ± 0%    4.50µs ± 0%   -0.27%  (p=0.000 n=27+24)
[Geo mean]                  695µs          689µs        -0.92%

name                     old speed      new speed      delta
GobDecode-4              8.19MB/s ± 0%  8.12MB/s ± 4%     ~     (p=0.680 n=24+30)
GobEncode-4              9.76MB/s ± 7%  9.96MB/s ± 0%     ~     (p=0.616 n=30+23)
Gzip-4                   4.84MB/s ± 0%  4.89MB/s ± 4%   +1.16%  (p=0.030 n=24+30)
Gunzip-4                 49.9MB/s ± 4%  50.6MB/s ± 0%     ~     (p=0.162 n=30+23)
JSONEncode-4             10.9MB/s ± 1%  10.7MB/s ± 6%     ~     (p=0.575 n=24+30)
JSONDecode-4             2.30MB/s ± 0%  2.32MB/s ± 5%   +0.72%  (p=0.003 n=22+30)
GoParse-4                1.31MB/s ± 6%  1.34MB/s ± 0%   +2.26%  (p=0.002 n=30+27)
RegexpMatchEasy0_32-4    30.0MB/s ± 6%  30.0MB/s ± 4%     ~     (p=1.000 n=30+30)
RegexpMatchEasy0_1K-4     186MB/s ± 0%   187MB/s ± 0%   +0.35%  (p=0.000 n=23+26)
RegexpMatchEasy1_32-4    31.8MB/s ± 0%  31.5MB/s ± 4%   -0.92%  (p=0.012 n=25+30)
RegexpMatchEasy1_1K-4     138MB/s ± 0%   143MB/s ± 0%   +3.53%  (p=0.000 n=23+24)
RegexpMatchMedium_32-4    560kB/s ± 0%   553kB/s ± 4%   -1.19%  (p=0.005 n=23+30)
RegexpMatchMedium_1K-4   1.87MB/s ± 0%  1.89MB/s ± 3%   +1.04%  (p=0.002 n=24+30)
RegexpMatchHard_32-4     1.05MB/s ± 0%  1.08MB/s ± 0%   +2.40%  (p=0.000 n=19+23)
RegexpMatchHard_1K-4     1.12MB/s ± 0%  1.12MB/s ± 5%   +0.12%  (p=0.006 n=25+30)
Revcomp-4                40.2MB/s ± 1%  39.6MB/s ± 4%     ~     (p=0.242 n=25+30)
Template-4               2.24MB/s ± 6%  2.21MB/s ± 6%   -1.15%  (p=0.000 n=30+30)
[Geo mean]               7.87MB/s       7.91MB/s        +0.44%

Change-Id: If374cb7abf83537aa0a176f73c0f736f7800db03
Reviewed-on: https://go-review.googlesource.com/108735Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent ceda47d0
...@@ -92,6 +92,23 @@ func genshift(s *gc.SSAGenState, as obj.As, r0, r1, r int16, typ int64, n int64) ...@@ -92,6 +92,23 @@ func genshift(s *gc.SSAGenState, as obj.As, r0, r1, r int16, typ int64, n int64)
return p return p
} }
// generate the memory operand for the indexed load/store instructions
func genIndexedOperand(v *ssa.Value) obj.Addr {
// Reg: base register, Index: (shifted) index register
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
switch v.Op {
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8:
mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4:
mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
default: // not shifted
mop.Index = v.Args[1].Reg()
}
return mop
}
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
switch v.Op { switch v.Op {
case ssa.OpCopy, ssa.OpARM64MOVDreg: case ssa.OpCopy, ssa.OpARM64MOVDreg:
...@@ -351,12 +368,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -351,12 +368,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpARM64MOVHUloadidx, ssa.OpARM64MOVHUloadidx,
ssa.OpARM64MOVWloadidx, ssa.OpARM64MOVWloadidx,
ssa.OpARM64MOVWUloadidx, ssa.OpARM64MOVWUloadidx,
ssa.OpARM64MOVDloadidx: ssa.OpARM64MOVDloadidx,
ssa.OpARM64MOVHloadidx2,
ssa.OpARM64MOVHUloadidx2,
ssa.OpARM64MOVWloadidx4,
ssa.OpARM64MOVWUloadidx4,
ssa.OpARM64MOVDloadidx8:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM p.From = genIndexedOperand(v)
p.From.Name = obj.NAME_NONE
p.From.Reg = v.Args[0].Reg()
p.From.Index = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpARM64LDAR, case ssa.OpARM64LDAR,
...@@ -384,14 +403,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -384,14 +403,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARM64MOVBstoreidx, case ssa.OpARM64MOVBstoreidx,
ssa.OpARM64MOVHstoreidx, ssa.OpARM64MOVHstoreidx,
ssa.OpARM64MOVWstoreidx, ssa.OpARM64MOVWstoreidx,
ssa.OpARM64MOVDstoreidx: ssa.OpARM64MOVDstoreidx,
ssa.OpARM64MOVHstoreidx2,
ssa.OpARM64MOVWstoreidx4,
ssa.OpARM64MOVDstoreidx8:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg() p.From.Reg = v.Args[2].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()
case ssa.OpARM64STP: case ssa.OpARM64STP:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG p.From.Type = obj.TYPE_REGREG
...@@ -413,14 +432,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -413,14 +432,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARM64MOVBstorezeroidx, case ssa.OpARM64MOVBstorezeroidx,
ssa.OpARM64MOVHstorezeroidx, ssa.OpARM64MOVHstorezeroidx,
ssa.OpARM64MOVWstorezeroidx, ssa.OpARM64MOVWstorezeroidx,
ssa.OpARM64MOVDstorezeroidx: ssa.OpARM64MOVDstorezeroidx,
ssa.OpARM64MOVHstorezeroidx2,
ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64MOVDstorezeroidx8:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.From.Type = obj.TYPE_REG p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO p.From.Reg = arm64.REGZERO
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()
case ssa.OpARM64MOVQstorezero: case ssa.OpARM64MOVQstorezero:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG p.From.Type = obj.TYPE_REGREG
......
...@@ -324,13 +324,20 @@ func init() { ...@@ -324,13 +324,20 @@ func init() {
{name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
// register indexed load // register indexed load
{name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", faultOnNilArg0: true}, // load 64-bit dword from arg0 + arg1, arg2 = mem. {name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD"}, // load 64-bit dword from arg0 + arg1, arg2 = mem.
{name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. {name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW"}, // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. {name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU"}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. {name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH"}, // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. {name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU"}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem. {name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB"}, // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem. {name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU"}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
// shifted register indexed load
{name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
{name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
{name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
...@@ -341,10 +348,15 @@ func init() { ...@@ -341,10 +348,15 @@ func init() {
{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
// register indexed store // register indexed store
{name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem. {name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem"}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem. {name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem. {name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem. {name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
// shifted register indexed store
{name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
{name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
{name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
...@@ -353,10 +365,15 @@ func init() { ...@@ -353,10 +365,15 @@ func init() {
{name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem.
// register indexed store zero // register indexed store zero
{name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of zero to arg0 + arg1, arg2 = mem. {name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
{name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem. {name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem. {name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem. {name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
// shifted register indexed store zero
{name: "MOVHstorezeroidx2", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1*2, arg2 = mem.
{name: "MOVWstorezeroidx4", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1*4, arg2 = mem.
{name: "MOVDstorezeroidx8", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1*8, arg2 = mem.
{name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion) {name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion)
{name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion) {name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion)
......
This diff is collapsed.
...@@ -110,6 +110,21 @@ func load_byte2_uint16(s []byte) uint16 { ...@@ -110,6 +110,21 @@ func load_byte2_uint16(s []byte) uint16 {
return uint16(s[0]) | uint16(s[1])<<8 return uint16(s[0]) | uint16(s[1])<<8
} }
func load_byte2_uint16_idx(s []byte, idx int) uint16 {
// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
}
func load_byte4_uint32_idx(s []byte, idx int) uint32 {
// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
}
func load_byte8_uint64_idx(s []byte, idx int) uint64 {
// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
}
// Check load combining across function calls. // Check load combining across function calls.
func fcall_byte(a, b byte) (byte, byte) { func fcall_byte(a, b byte) (byte, byte) {
...@@ -268,6 +283,32 @@ func zero_byte_16(b []byte) { ...@@ -268,6 +283,32 @@ func zero_byte_16(b []byte) {
b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW" b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
} }
func zero_byte_2_idx(b []byte, idx int) {
// arm64: `MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
b[(idx<<1)+0] = 0
b[(idx<<1)+1] = 0
}
func zero_byte_4_idx(b []byte, idx int) {
// arm64: `MOVW\sZR,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOV[BH]`
b[(idx<<2)+0] = 0
b[(idx<<2)+1] = 0
b[(idx<<2)+2] = 0
b[(idx<<2)+3] = 0
}
func zero_byte_8_idx(b []byte, idx int) {
// arm64: `MOVD\sZR,\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`MOV[BHW]`
b[(idx<<3)+0] = 0
b[(idx<<3)+1] = 0
b[(idx<<3)+2] = 0
b[(idx<<3)+3] = 0
b[(idx<<3)+4] = 0
b[(idx<<3)+5] = 0
b[(idx<<3)+6] = 0
b[(idx<<3)+7] = 0
}
func zero_byte_30(a *[30]byte) { func zero_byte_30(a *[30]byte) {
*a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW" *a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment