Commit aaf73c6d authored by Ben Shi's avatar Ben Shi Committed by Cherry Zhang

cmd/compile: optimize ARM64 with shifted register indexed load/store

ARM64 supports efficient instructions which combine shift, addition, load/store
together. Such as "MOVD (R0)(R1<<3), R2" and "MOVWU R6, (R4)(R1<<2)".

This CL optimizes the compiler to emit such efficient instuctions. And below
is some test data.

1. binary size before/after
binary                 size change
pkg/linux_arm64        +80.1KB
pkg/tool/linux_arm64   +121.9KB
go                     -4.3KB
gofmt                  -64KB

2. go1 benchmark
There is big improvement for the test case Fannkuch11, and slight
improvement for sme others, excluding noise.

name                     old time/op    new time/op    delta
BinaryTree17-4              43.9s ± 2%     44.0s ± 2%     ~     (p=0.820 n=30+30)
Fannkuch11-4                30.6s ± 2%     24.5s ± 3%  -19.93%  (p=0.000 n=25+30)
FmtFprintfEmpty-4           500ns ± 0%     499ns ± 0%   -0.11%  (p=0.000 n=23+25)
FmtFprintfString-4         1.03µs ± 0%    1.04µs ± 3%     ~     (p=0.065 n=29+30)
FmtFprintfInt-4            1.15µs ± 3%    1.15µs ± 4%   -0.56%  (p=0.000 n=30+30)
FmtFprintfIntInt-4         1.80µs ± 5%    1.82µs ± 0%     ~     (p=0.094 n=30+24)
FmtFprintfPrefixedInt-4    2.17µs ± 5%    2.20µs ± 0%     ~     (p=0.100 n=30+23)
FmtFprintfFloat-4          3.08µs ± 3%    3.09µs ± 4%     ~     (p=0.123 n=30+30)
FmtManyArgs-4              7.41µs ± 4%    7.17µs ± 1%   -3.26%  (p=0.000 n=30+23)
GobDecode-4                93.7ms ± 0%    94.7ms ± 4%     ~     (p=0.685 n=24+30)
GobEncode-4                78.7ms ± 7%    77.1ms ± 0%     ~     (p=0.729 n=30+23)
Gzip-4                      4.01s ± 0%     3.97s ± 5%   -1.11%  (p=0.037 n=24+30)
Gunzip-4                    389ms ± 4%     384ms ± 0%     ~     (p=0.155 n=30+23)
HTTPClientServer-4          536µs ± 1%     537µs ± 1%     ~     (p=0.236 n=30+30)
JSONEncode-4                179ms ± 1%     182ms ± 6%     ~     (p=0.763 n=24+30)
JSONDecode-4                843ms ± 0%     839ms ± 6%   -0.42%  (p=0.003 n=25+30)
Mandelbrot200-4            46.5ms ± 0%    46.5ms ± 0%   +0.02%  (p=0.000 n=26+26)
GoParse-4                  44.3ms ± 6%    43.3ms ± 0%     ~     (p=0.067 n=30+27)
RegexpMatchEasy0_32-4      1.07µs ± 7%    1.07µs ± 4%     ~     (p=0.835 n=30+30)
RegexpMatchEasy0_1K-4      5.51µs ± 0%    5.49µs ± 0%   -0.35%  (p=0.000 n=23+26)
RegexpMatchEasy1_32-4      1.01µs ± 0%    1.02µs ± 4%   +0.96%  (p=0.014 n=24+30)
RegexpMatchEasy1_1K-4      7.43µs ± 0%    7.18µs ± 0%   -3.41%  (p=0.000 n=23+24)
RegexpMatchMedium_32-4     1.78µs ± 0%    1.81µs ± 4%   +1.47%  (p=0.012 n=23+30)
RegexpMatchMedium_1K-4      547µs ± 1%     542µs ± 3%   -0.90%  (p=0.003 n=24+30)
RegexpMatchHard_32-4       30.4µs ± 0%    29.7µs ± 0%   -2.15%  (p=0.000 n=19+23)
RegexpMatchHard_1K-4        913µs ± 0%     915µs ± 6%   +0.25%  (p=0.012 n=24+30)
Revcomp-4                   6.32s ± 1%     6.42s ± 4%     ~     (p=0.342 n=25+30)
Template-4                  868ms ± 6%     878ms ± 6%   +1.15%  (p=0.000 n=30+30)
TimeParse-4                4.57µs ± 4%    4.59µs ± 3%   +0.65%  (p=0.010 n=29+30)
TimeFormat-4               4.51µs ± 0%    4.50µs ± 0%   -0.27%  (p=0.000 n=27+24)
[Geo mean]                  695µs          689µs        -0.92%

name                     old speed      new speed      delta
GobDecode-4              8.19MB/s ± 0%  8.12MB/s ± 4%     ~     (p=0.680 n=24+30)
GobEncode-4              9.76MB/s ± 7%  9.96MB/s ± 0%     ~     (p=0.616 n=30+23)
Gzip-4                   4.84MB/s ± 0%  4.89MB/s ± 4%   +1.16%  (p=0.030 n=24+30)
Gunzip-4                 49.9MB/s ± 4%  50.6MB/s ± 0%     ~     (p=0.162 n=30+23)
JSONEncode-4             10.9MB/s ± 1%  10.7MB/s ± 6%     ~     (p=0.575 n=24+30)
JSONDecode-4             2.30MB/s ± 0%  2.32MB/s ± 5%   +0.72%  (p=0.003 n=22+30)
GoParse-4                1.31MB/s ± 6%  1.34MB/s ± 0%   +2.26%  (p=0.002 n=30+27)
RegexpMatchEasy0_32-4    30.0MB/s ± 6%  30.0MB/s ± 4%     ~     (p=1.000 n=30+30)
RegexpMatchEasy0_1K-4     186MB/s ± 0%   187MB/s ± 0%   +0.35%  (p=0.000 n=23+26)
RegexpMatchEasy1_32-4    31.8MB/s ± 0%  31.5MB/s ± 4%   -0.92%  (p=0.012 n=25+30)
RegexpMatchEasy1_1K-4     138MB/s ± 0%   143MB/s ± 0%   +3.53%  (p=0.000 n=23+24)
RegexpMatchMedium_32-4    560kB/s ± 0%   553kB/s ± 4%   -1.19%  (p=0.005 n=23+30)
RegexpMatchMedium_1K-4   1.87MB/s ± 0%  1.89MB/s ± 3%   +1.04%  (p=0.002 n=24+30)
RegexpMatchHard_32-4     1.05MB/s ± 0%  1.08MB/s ± 0%   +2.40%  (p=0.000 n=19+23)
RegexpMatchHard_1K-4     1.12MB/s ± 0%  1.12MB/s ± 5%   +0.12%  (p=0.006 n=25+30)
Revcomp-4                40.2MB/s ± 1%  39.6MB/s ± 4%     ~     (p=0.242 n=25+30)
Template-4               2.24MB/s ± 6%  2.21MB/s ± 6%   -1.15%  (p=0.000 n=30+30)
[Geo mean]               7.87MB/s       7.91MB/s        +0.44%

Change-Id: If374cb7abf83537aa0a176f73c0f736f7800db03
Reviewed-on: https://go-review.googlesource.com/108735Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent ceda47d0
......@@ -92,6 +92,23 @@ func genshift(s *gc.SSAGenState, as obj.As, r0, r1, r int16, typ int64, n int64)
return p
}
// generate the memory operand for the indexed load/store instructions
func genIndexedOperand(v *ssa.Value) obj.Addr {
// Reg: base register, Index: (shifted) index register
mop := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
switch v.Op {
case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8, ssa.OpARM64MOVDstorezeroidx8:
mop.Index = arm64.REG_LSL | 3<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4, ssa.OpARM64MOVWstorezeroidx4:
mop.Index = arm64.REG_LSL | 2<<5 | v.Args[1].Reg()&31
case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2, ssa.OpARM64MOVHstorezeroidx2:
mop.Index = arm64.REG_LSL | 1<<5 | v.Args[1].Reg()&31
default: // not shifted
mop.Index = v.Args[1].Reg()
}
return mop
}
func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
switch v.Op {
case ssa.OpCopy, ssa.OpARM64MOVDreg:
......@@ -351,12 +368,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
ssa.OpARM64MOVHUloadidx,
ssa.OpARM64MOVWloadidx,
ssa.OpARM64MOVWUloadidx,
ssa.OpARM64MOVDloadidx:
ssa.OpARM64MOVDloadidx,
ssa.OpARM64MOVHloadidx2,
ssa.OpARM64MOVHUloadidx2,
ssa.OpARM64MOVWloadidx4,
ssa.OpARM64MOVWUloadidx4,
ssa.OpARM64MOVDloadidx8:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Name = obj.NAME_NONE
p.From.Reg = v.Args[0].Reg()
p.From.Index = v.Args[1].Reg()
p.From = genIndexedOperand(v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64LDAR,
......@@ -384,14 +403,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARM64MOVBstoreidx,
ssa.OpARM64MOVHstoreidx,
ssa.OpARM64MOVWstoreidx,
ssa.OpARM64MOVDstoreidx:
ssa.OpARM64MOVDstoreidx,
ssa.OpARM64MOVHstoreidx2,
ssa.OpARM64MOVWstoreidx4,
ssa.OpARM64MOVDstoreidx8:
p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()
case ssa.OpARM64STP:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG
......@@ -413,14 +432,14 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARM64MOVBstorezeroidx,
ssa.OpARM64MOVHstorezeroidx,
ssa.OpARM64MOVWstorezeroidx,
ssa.OpARM64MOVDstorezeroidx:
ssa.OpARM64MOVDstorezeroidx,
ssa.OpARM64MOVHstorezeroidx2,
ssa.OpARM64MOVWstorezeroidx4,
ssa.OpARM64MOVDstorezeroidx8:
p := s.Prog(v.Op.Asm())
p.To = genIndexedOperand(v)
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()
case ssa.OpARM64MOVQstorezero:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG
......
......@@ -659,6 +659,30 @@
(MOVBloadidx ptr (MOVDconst [c]) mem) -> (MOVBload [c] ptr mem)
(MOVBloadidx (MOVDconst [c]) ptr mem) -> (MOVBload [c] ptr mem)
// shifted register indexed load
(MOVDload [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx8 ptr idx mem)
(MOVWUload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx4 ptr idx mem)
(MOVWload [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx4 ptr idx mem)
(MOVHUload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx2 ptr idx mem)
(MOVHload [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx2 ptr idx mem)
(MOVDloadidx ptr (SLLconst [3] idx) mem) -> (MOVDloadidx8 ptr idx mem)
(MOVWloadidx ptr (SLLconst [2] idx) mem) -> (MOVWloadidx4 ptr idx mem)
(MOVWUloadidx ptr (SLLconst [2] idx) mem) -> (MOVWUloadidx4 ptr idx mem)
(MOVHloadidx ptr (SLLconst [1] idx) mem) -> (MOVHloadidx2 ptr idx mem)
(MOVHUloadidx ptr (SLLconst [1] idx) mem) -> (MOVHUloadidx2 ptr idx mem)
(MOVHloadidx ptr (ADD idx idx) mem) -> (MOVHloadidx2 ptr idx mem)
(MOVHUloadidx ptr (ADD idx idx) mem) -> (MOVHUloadidx2 ptr idx mem)
(MOVDloadidx (SLLconst [3] idx) ptr mem) -> (MOVDloadidx8 ptr idx mem)
(MOVWloadidx (SLLconst [2] idx) ptr mem) -> (MOVWloadidx4 ptr idx mem)
(MOVWUloadidx (SLLconst [2] idx) ptr mem) -> (MOVWUloadidx4 ptr idx mem)
(MOVHloadidx (ADD idx idx) ptr mem) -> (MOVHloadidx2 ptr idx mem)
(MOVHUloadidx (ADD idx idx) ptr mem) -> (MOVHUloadidx2 ptr idx mem)
(MOVDloadidx8 ptr (MOVDconst [c]) mem) -> (MOVDload [c<<3] ptr mem)
(MOVWUloadidx4 ptr (MOVDconst [c]) mem) -> (MOVWUload [c<<2] ptr mem)
(MOVWloadidx4 ptr (MOVDconst [c]) mem) -> (MOVWload [c<<2] ptr mem)
(MOVHUloadidx2 ptr (MOVDconst [c]) mem) -> (MOVHUload [c<<1] ptr mem)
(MOVHloadidx2 ptr (MOVDconst [c]) mem) -> (MOVHload [c<<1] ptr mem)
(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVBstore [off1+off2] {sym} ptr val mem)
......@@ -710,6 +734,22 @@
(MOVBstoreidx ptr (MOVDconst [c]) val mem) -> (MOVBstore [c] ptr val mem)
(MOVBstoreidx (MOVDconst [c]) idx val mem) -> (MOVBstore [c] idx val mem)
// shifted register indexed store
(MOVDstore [off] {sym} (ADDshiftLL [3] ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx8 ptr idx val mem)
(MOVWstore [off] {sym} (ADDshiftLL [2] ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx4 ptr idx val mem)
(MOVHstore [off] {sym} (ADDshiftLL [1] ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx2 ptr idx val mem)
(MOVDstoreidx ptr (SLLconst [3] idx) val mem) -> (MOVDstoreidx8 ptr idx val mem)
(MOVWstoreidx ptr (SLLconst [2] idx) val mem) -> (MOVWstoreidx4 ptr idx val mem)
(MOVHstoreidx ptr (SLLconst [1] idx) val mem) -> (MOVHstoreidx2 ptr idx val mem)
(MOVHstoreidx ptr (ADD idx idx) val mem) -> (MOVHstoreidx2 ptr idx val mem)
(MOVDstoreidx (SLLconst [3] idx) ptr val mem) -> (MOVDstoreidx8 ptr idx val mem)
(MOVWstoreidx (SLLconst [2] idx) ptr val mem) -> (MOVWstoreidx4 ptr idx val mem)
(MOVHstoreidx (SLLconst [1] idx) ptr val mem) -> (MOVHstoreidx2 ptr idx val mem)
(MOVHstoreidx (ADD idx idx) ptr val mem) -> (MOVHstoreidx2 ptr idx val mem)
(MOVDstoreidx8 ptr (MOVDconst [c]) val mem) -> (MOVDstore [c<<3] ptr val mem)
(MOVWstoreidx4 ptr (MOVDconst [c]) val mem) -> (MOVWstore [c<<2] ptr val mem)
(MOVHstoreidx2 ptr (MOVDconst [c]) val mem) -> (MOVHstore [c<<1] ptr val mem)
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
......@@ -821,6 +861,25 @@
(MOVBstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVBstorezero [c] ptr mem)
(MOVBstorezeroidx (MOVDconst [c]) idx mem) -> (MOVBstorezero [c] idx mem)
// shifted register indexed store zero
(MOVDstorezero [off] {sym} (ADDshiftLL [3] ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx8 ptr idx mem)
(MOVWstorezero [off] {sym} (ADDshiftLL [2] ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx4 ptr idx mem)
(MOVHstorezero [off] {sym} (ADDshiftLL [1] ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx2 ptr idx mem)
(MOVDstorezeroidx ptr (SLLconst [3] idx) mem) -> (MOVDstorezeroidx8 ptr idx mem)
(MOVWstorezeroidx ptr (SLLconst [2] idx) mem) -> (MOVWstorezeroidx4 ptr idx mem)
(MOVHstorezeroidx ptr (SLLconst [1] idx) mem) -> (MOVHstorezeroidx2 ptr idx mem)
(MOVHstorezeroidx ptr (ADD idx idx) mem) -> (MOVHstorezeroidx2 ptr idx mem)
(MOVDstorezeroidx (SLLconst [3] idx) ptr mem) -> (MOVDstorezeroidx8 ptr idx mem)
(MOVWstorezeroidx (SLLconst [2] idx) ptr mem) -> (MOVWstorezeroidx4 ptr idx mem)
(MOVHstorezeroidx (SLLconst [1] idx) ptr mem) -> (MOVHstorezeroidx2 ptr idx mem)
(MOVHstorezeroidx (ADD idx idx) ptr mem) -> (MOVHstorezeroidx2 ptr idx mem)
(MOVDstoreidx8 ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx8 ptr idx mem)
(MOVWstoreidx4 ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx4 ptr idx mem)
(MOVHstoreidx2 ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx2 ptr idx mem)
(MOVDstorezeroidx8 ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c<<3] ptr mem)
(MOVWstorezeroidx4 ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c<<2] ptr mem)
(MOVHstorezeroidx2 ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c<<1] ptr mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
// these seem to have bad interaction with other rules, resulting in slower code
//(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
......@@ -856,6 +915,12 @@
(MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
(MOVHloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
(MOVHUloadidx2 ptr idx (MOVHstorezeroidx2 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
(MOVWloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
(MOVWUloadidx4 ptr idx (MOVWstorezeroidx4 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
(MOVDloadidx8 ptr idx (MOVDstorezeroidx8 ptr2 idx2 _)) && isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) -> (MOVDconst [0])
// don't extend after proper load
(MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
(MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
......@@ -887,6 +952,13 @@
(MOVWUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVWUloadidx _ _ _)) -> (MOVDreg x)
(MOVHreg x:(MOVHloadidx2 _ _ _)) -> (MOVDreg x)
(MOVHUreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
(MOVWreg x:(MOVHloadidx2 _ _ _)) -> (MOVDreg x)
(MOVWreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
(MOVWreg x:(MOVWloadidx4 _ _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVHUloadidx2 _ _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVWUloadidx4 _ _ _)) -> (MOVDreg x)
// fold double extensions
(MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
......@@ -930,6 +1002,12 @@
(MOVHstoreidx ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
(MOVWstoreidx ptr idx (MOVWreg x) mem) -> (MOVWstoreidx ptr idx x mem)
(MOVWstoreidx ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx ptr idx x mem)
(MOVHstoreidx2 ptr idx (MOVHreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
(MOVHstoreidx2 ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
(MOVHstoreidx2 ptr idx (MOVWreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
(MOVHstoreidx2 ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx2 ptr idx x mem)
(MOVWstoreidx4 ptr idx (MOVWreg x) mem) -> (MOVWstoreidx4 ptr idx x mem)
(MOVWstoreidx4 ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx4 ptr idx x mem)
// if a register move has only 1 use, just use the same register without emitting instruction
// MOVDnop doesn't emit instruction, only for ensuring the type.
......@@ -1567,6 +1645,15 @@
&& clobber(x0) && clobber(x1)
&& clobber(y0) && clobber(y1)
-> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
(ORshiftLL <t> [8]
y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem))
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
&& x0.Uses == 1 && x1.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0) && clobber(x1)
&& clobber(y0) && clobber(y1)
-> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr idx mem)
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
......@@ -1598,6 +1685,33 @@
&& clobber(y1) && clobber(y2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
x0:(MOVHUloadidx ptr idx mem)
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& y1.Uses == 1 && y2.Uses == 1
&& o0.Uses == 1
&& mergePoint(b,x0,x1,x2) != nil
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(y1) && clobber(y2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr idx mem)
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
x0:(MOVHUloadidx2 ptr0 idx0 mem)
y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADDshiftLL [1] ptr1 idx1) mem)))
y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& y1.Uses == 1 && y2.Uses == 1
&& o0.Uses == 1
&& mergePoint(b,x0,x1,x2) != nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(y1) && clobber(y2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 (SLLconst <idx0.Type> [1] idx0) mem)
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
......@@ -1635,6 +1749,37 @@
&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
x0:(MOVWUloadidx4 ptr0 idx0 mem)
y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADDshiftLL [2] ptr1 idx1) mem)))
y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 (SLLconst <idx0.Type> [2] idx0) mem)
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
x0:(MOVWUloadidx ptr idx mem)
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr idx mem)
// b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
......@@ -1669,6 +1814,19 @@
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(o0) && clobber(o1) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
y3:(MOVDnop x3:(MOVBUloadidx ptr idx mem)))
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(o0) && clobber(o1) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr idx mem)
// b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
......@@ -1727,6 +1885,29 @@
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
&& clobber(o4) && clobber(o5) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
y7:(MOVDnop x7:(MOVBUloadidx ptr idx mem)))
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
&& clobber(o4) && clobber(o5) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr idx mem)
// big endian loads
// b[1] | b[0]<<8 -> load 16-bit, reverse
......@@ -1751,6 +1932,15 @@
&& clobber(x0) && clobber(x1)
&& clobber(y0) && clobber(y1)
-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
(ORshiftLL <t> [8]
y0:(MOVDnop x0:(MOVBUloadidx ptr (ADDconst [1] idx) mem))
y1:(MOVDnop x1:(MOVBUloadidx ptr idx mem)))
&& x0.Uses == 1 && x1.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& clobber(x0) && clobber(x1)
&& clobber(y0) && clobber(y1)
-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr idx mem))
// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
......@@ -1782,6 +1972,18 @@
&& clobber(y0) && clobber(y1) && clobber(y2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
y0:(REV16W x0:(MOVHUloadidx ptr (ADDconst [2] idx) mem))
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr idx mem)))
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
&& o0.Uses == 1
&& mergePoint(b,x0,x1,x2) != nil
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(y0) && clobber(y1) && clobber(y2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
......@@ -1819,6 +2021,20 @@
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
y0:(REVW x0:(MOVWUloadidx ptr (ADDconst [4] idx) mem))
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
y4:(MOVDnop x4:(MOVBUloadidx ptr idx mem)))
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr idx mem))
// b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
......@@ -1853,6 +2069,19 @@
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(o0) && clobber(o1) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(o0) && clobber(o1) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr idx mem))
// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
......@@ -1911,6 +2140,29 @@
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
&& clobber(o4) && clobber(o5) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
y0:(MOVDnop x0:(MOVBUloadidx ptr idx mem)))
y1:(MOVDnop x1:(MOVBUloadidx ptr (ADDconst [1] idx) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr (ADDconst [2] idx) mem)))
y3:(MOVDnop x3:(MOVBUloadidx ptr (ADDconst [3] idx) mem)))
y4:(MOVDnop x4:(MOVBUloadidx ptr (ADDconst [4] idx) mem)))
y5:(MOVDnop x5:(MOVBUloadidx ptr (ADDconst [5] idx) mem)))
y6:(MOVDnop x6:(MOVBUloadidx ptr (ADDconst [6] idx) mem)))
y7:(MOVDnop x7:(MOVBUloadidx ptr (ADDconst [7] idx) mem)))
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
&& clobber(o4) && clobber(o5) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr idx mem))
// Combine zero stores into larger (unaligned) stores.
(MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
......@@ -1926,6 +2178,10 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstorezeroidx ptr1 idx1 mem)
(MOVBstorezeroidx ptr (ADDconst [1] idx) x:(MOVBstorezeroidx ptr idx mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstorezeroidx ptr idx mem)
(MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
&& x.Uses == 1
&& areAdjacentOffsets(i,j,2)
......@@ -1939,6 +2195,16 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstorezeroidx ptr1 idx1 mem)
(MOVHstorezeroidx ptr (ADDconst [2] idx) x:(MOVHstorezeroidx ptr idx mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVWstorezeroidx ptr idx mem)
(MOVHstorezero [2] {s} (ADDshiftLL [1] ptr0 idx0) x:(MOVHstorezeroidx2 ptr1 idx1 mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVWstorezeroidx ptr1 (SLLconst <idx1.Type> [1] idx1) mem)
(MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
&& x.Uses == 1
&& areAdjacentOffsets(i,j,4)
......@@ -1952,6 +2218,16 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVDstorezeroidx ptr1 idx1 mem)
(MOVWstorezeroidx ptr (ADDconst [4] idx) x:(MOVWstorezeroidx ptr idx mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVDstorezeroidx ptr idx mem)
(MOVWstorezero [4] {s} (ADDshiftLL [2] ptr0 idx0) x:(MOVWstorezeroidx4 ptr1 idx1 mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVDstorezeroidx ptr1 (SLLconst <idx1.Type> [2] idx1) mem)
(MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
&& x.Uses == 1
&& areAdjacentOffsets(i,j,8)
......@@ -1965,6 +2241,12 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVQstorezero [0] {s} p0 mem)
(MOVDstorezero [8] {s} p0:(ADDshiftLL [3] ptr0 idx0) x:(MOVDstorezeroidx8 ptr1 idx1 mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVQstorezero [0] {s} p0 mem)
// Combine stores into larger (unaligned) stores.
(MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
......@@ -1978,6 +2260,10 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w mem)
(MOVBstoreidx ptr (ADDconst [1] idx) (SRLconst [8] w) x:(MOVBstoreidx ptr idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx ptr idx w mem)
(MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
......@@ -2061,6 +2347,16 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w mem)
(MOVHstoreidx ptr (ADDconst [2] idx) (SRLconst [16] w) x:(MOVHstoreidx ptr idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVWstoreidx ptr idx w mem)
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
(MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
......@@ -2072,6 +2368,12 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w mem)
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
(MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
......@@ -2083,6 +2385,12 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w mem)
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx2 ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
(MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
......@@ -2094,6 +2402,12 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w0 mem)
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx2 ptr1 idx1 w0:(SRLconst [j-16] w) mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w0 mem)
(MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
......@@ -2105,6 +2419,16 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVDstoreidx ptr1 idx1 w mem)
(MOVWstoreidx ptr (ADDconst [4] idx) (SRLconst [32] w) x:(MOVWstoreidx ptr idx w mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVDstoreidx ptr idx w mem)
(MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx4 ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w mem)
(MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
......@@ -2116,6 +2440,12 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVDstoreidx ptr1 idx1 w0 mem)
(MOVWstore [4] {s} (ADDshiftLL [2] ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx4 ptr1 idx1 w0:(SRLconst [j-32] w) mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVDstoreidx ptr1 (SLLconst <idx1.Type> [2] idx1) w0 mem)
(MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
......@@ -2190,6 +2520,28 @@
&& clobber(x1)
&& clobber(x2)
-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
(MOVBstoreidx ptr (ADDconst [3] idx) w
x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(8, 24)] w)
x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
(MOVBstoreidx ptr idx w
x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 24)] w)
x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVWstoreidx ptr idx w mem)
(MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
......@@ -2260,6 +2612,14 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
(MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(8, 8)] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
(MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 8)] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx ptr idx w mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
&& x.Uses == 1
&& clobber(x)
......
......@@ -324,13 +324,20 @@ func init() {
{name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
// register indexed load
{name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", faultOnNilArg0: true}, // load 64-bit dword from arg0 + arg1, arg2 = mem.
{name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD"}, // load 64-bit dword from arg0 + arg1, arg2 = mem.
{name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW"}, // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU"}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH"}, // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU"}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB"}, // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU"}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
// shifted register indexed load
{name: "MOVHloadidx2", argLength: 3, reg: gp2load, asm: "MOVH"}, // load 16-bit half-word from arg0 + arg1*2, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx2", argLength: 3, reg: gp2load, asm: "MOVHU"}, // load 16-bit half-word from arg0 + arg1*2, zero-extended to 64-bit, arg2=mem.
{name: "MOVWloadidx4", argLength: 3, reg: gp2load, asm: "MOVW"}, // load 32-bit word from arg0 + arg1*4, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx4", argLength: 3, reg: gp2load, asm: "MOVWU"}, // load 32-bit word from arg0 + arg1*4, zero-extended to 64-bit, arg2=mem.
{name: "MOVDloadidx8", argLength: 3, reg: gp2load, asm: "MOVD"}, // load 64-bit double-word from arg0 + arg1*8, arg2 = mem.
{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
......@@ -341,10 +348,15 @@ func init() {
{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
// register indexed store
{name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem"}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
// shifted register indexed store
{name: "MOVHstoreidx2", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem"}, // store 2 bytes of arg2 to arg0 + arg1*2, arg3 = mem.
{name: "MOVWstoreidx4", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem"}, // store 4 bytes of arg2 to arg0 + arg1*4, arg3 = mem.
{name: "MOVDstoreidx8", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem"}, // store 8 bytes of arg2 to arg0 + arg1*8, arg3 = mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
......@@ -353,10 +365,15 @@ func init() {
{name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem.
// register indexed store zero
{name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
{name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem"}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
{name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
// shifted register indexed store zero
{name: "MOVHstorezeroidx2", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem"}, // store 2 bytes of zero to arg0 + arg1*2, arg2 = mem.
{name: "MOVWstorezeroidx4", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem"}, // store 4 bytes of zero to arg0 + arg1*4, arg2 = mem.
{name: "MOVDstorezeroidx8", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem"}, // store 8 bytes of zero to arg0 + arg1*8, arg2 = mem.
{name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion)
{name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion)
......
......@@ -1168,6 +1168,11 @@ const (
OpARM64MOVHUloadidx
OpARM64MOVBloadidx
OpARM64MOVBUloadidx
OpARM64MOVHloadidx2
OpARM64MOVHUloadidx2
OpARM64MOVWloadidx4
OpARM64MOVWUloadidx4
OpARM64MOVDloadidx8
OpARM64MOVBstore
OpARM64MOVHstore
OpARM64MOVWstore
......@@ -1179,6 +1184,9 @@ const (
OpARM64MOVHstoreidx
OpARM64MOVWstoreidx
OpARM64MOVDstoreidx
OpARM64MOVHstoreidx2
OpARM64MOVWstoreidx4
OpARM64MOVDstoreidx8
OpARM64MOVBstorezero
OpARM64MOVHstorezero
OpARM64MOVWstorezero
......@@ -1188,6 +1196,9 @@ const (
OpARM64MOVHstorezeroidx
OpARM64MOVWstorezeroidx
OpARM64MOVDstorezeroidx
OpARM64MOVHstorezeroidx2
OpARM64MOVWstorezeroidx4
OpARM64MOVDstorezeroidx8
OpARM64FMOVDgpfp
OpARM64FMOVDfpgp
OpARM64MOVBreg
......@@ -15233,10 +15244,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVDloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVD,
name: "MOVDloadidx",
argLen: 3,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15248,10 +15258,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVWloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVW,
name: "MOVWloadidx",
argLen: 3,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15263,10 +15272,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVWUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVWU,
name: "MOVWUloadidx",
argLen: 3,
asm: arm64.AMOVWU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15278,10 +15286,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVHloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVH,
name: "MOVHloadidx",
argLen: 3,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15293,10 +15300,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVHUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVHU,
name: "MOVHUloadidx",
argLen: 3,
asm: arm64.AMOVHU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15308,10 +15314,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVBloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVB,
name: "MOVBloadidx",
argLen: 3,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15323,10 +15328,79 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVBUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVBU,
name: "MOVBUloadidx",
argLen: 3,
asm: arm64.AMOVBU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVHloadidx2",
argLen: 3,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVHUloadidx2",
argLen: 3,
asm: arm64.AMOVHU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVWloadidx4",
argLen: 3,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVWUloadidx4",
argLen: 3,
asm: arm64.AMOVWU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVDloadidx8",
argLen: 3,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15437,10 +15511,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVBstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVB,
name: "MOVBstoreidx",
argLen: 4,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15450,10 +15523,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVHstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVH,
name: "MOVHstoreidx",
argLen: 4,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15463,10 +15535,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVWstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVW,
name: "MOVWstoreidx",
argLen: 4,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15476,10 +15547,45 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVDstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVD,
name: "MOVDstoreidx",
argLen: 4,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVHstoreidx2",
argLen: 4,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVWstoreidx4",
argLen: 4,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVDstoreidx8",
argLen: 4,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15554,10 +15660,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVBstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVB,
name: "MOVBstorezeroidx",
argLen: 3,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15566,10 +15671,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVHstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVH,
name: "MOVHstorezeroidx",
argLen: 3,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15578,10 +15682,9 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVWstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVW,
name: "MOVWstorezeroidx",
argLen: 3,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......@@ -15590,10 +15693,42 @@ var opcodeTable = [...]opInfo{
},
},
{
name: "MOVDstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVD,
name: "MOVDstorezeroidx",
argLen: 3,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVHstorezeroidx2",
argLen: 3,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVWstorezeroidx4",
argLen: 3,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVDstorezeroidx8",
argLen: 3,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -110,6 +110,21 @@ func load_byte2_uint16(s []byte) uint16 {
return uint16(s[0]) | uint16(s[1])<<8
}
func load_byte2_uint16_idx(s []byte, idx int) uint16 {
// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`ORR`,-`MOVB`
return uint16(s[idx<<1]) | uint16(s[(idx<<1)+1])<<8
}
func load_byte4_uint32_idx(s []byte, idx int) uint32 {
// arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`ORR`,-`MOV[BH]`
return uint32(s[idx<<2]) | uint32(s[(idx<<2)+1])<<8 | uint32(s[(idx<<2)+2])<<16 | uint32(s[(idx<<2)+3])<<24
}
func load_byte8_uint64_idx(s []byte, idx int) uint64 {
// arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`ORR`,-`MOV[BHW]`
return uint64(s[idx<<3]) | uint64(s[(idx<<3)+1])<<8 | uint64(s[(idx<<3)+2])<<16 | uint64(s[(idx<<3)+3])<<24 | uint64(s[(idx<<3)+4])<<32 | uint64(s[(idx<<3)+5])<<40 | uint64(s[(idx<<3)+6])<<48 | uint64(s[(idx<<3)+7])<<56
}
// Check load combining across function calls.
func fcall_byte(a, b byte) (byte, byte) {
......@@ -268,6 +283,32 @@ func zero_byte_16(b []byte) {
b[12], b[13], b[14], b[15] = 0, 0, 0, 0 // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
}
func zero_byte_2_idx(b []byte, idx int) {
// arm64: `MOVH\sZR,\s\(R[0-9]+\)\(R[0-9]+<<1\)`,-`MOVB`
b[(idx<<1)+0] = 0
b[(idx<<1)+1] = 0
}
func zero_byte_4_idx(b []byte, idx int) {
// arm64: `MOVW\sZR,\s\(R[0-9]+\)\(R[0-9]+<<2\)`,-`MOV[BH]`
b[(idx<<2)+0] = 0
b[(idx<<2)+1] = 0
b[(idx<<2)+2] = 0
b[(idx<<2)+3] = 0
}
func zero_byte_8_idx(b []byte, idx int) {
// arm64: `MOVD\sZR,\s\(R[0-9]+\)\(R[0-9]+<<3\)`,-`MOV[BHW]`
b[(idx<<3)+0] = 0
b[(idx<<3)+1] = 0
b[(idx<<3)+2] = 0
b[(idx<<3)+3] = 0
b[(idx<<3)+4] = 0
b[(idx<<3)+5] = 0
b[(idx<<3)+6] = 0
b[(idx<<3)+7] = 0
}
func zero_byte_30(a *[30]byte) {
*a = [30]byte{} // arm64:"STP",-"MOVB",-"MOVH",-"MOVW"
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment