Commit 34f5f8a5 authored by Ben Shi's avatar Ben Shi Committed by Cherry Zhang

cmd/compile: optimize ARM64 with register indexed load/store

ARM64 supports load/store instructions with a memory operand that
the address is calculated by base register + index register.

In this CL,
1. Some rules are added to the compile's ARM64 backend to emit
such efficient instructions.
2. A wrong rule of load combination is fixed.

The go1 benchmark does show improvement.

name                     old time/op    new time/op    delta
BinaryTree17-4              44.5s ± 2%     44.1s ± 1%   -0.81%  (p=0.000 n=28+29)
Fannkuch11-4                32.7s ± 3%     30.5s ± 0%   -6.79%  (p=0.000 n=30+26)
FmtFprintfEmpty-4           499ns ± 0%     506ns ± 5%   +1.39%  (p=0.003 n=25+30)
FmtFprintfString-4         1.07µs ± 0%    1.04µs ± 4%   -3.17%  (p=0.000 n=23+30)
FmtFprintfInt-4            1.15µs ± 4%    1.13µs ± 0%   -1.55%  (p=0.000 n=30+23)
FmtFprintfIntInt-4         1.77µs ± 4%    1.74µs ± 0%   -1.71%  (p=0.000 n=30+24)
FmtFprintfPrefixedInt-4    2.37µs ± 5%    2.12µs ± 0%  -10.56%  (p=0.000 n=30+23)
FmtFprintfFloat-4          3.03µs ± 1%    3.03µs ± 4%   -0.13%  (p=0.003 n=25+30)
FmtManyArgs-4              7.38µs ± 1%    7.43µs ± 4%   +0.59%  (p=0.003 n=25+30)
GobDecode-4                 101ms ± 6%      95ms ± 5%   -5.55%  (p=0.000 n=30+30)
GobEncode-4                78.0ms ± 4%    78.8ms ± 6%   +1.05%  (p=0.000 n=30+30)
Gzip-4                      4.25s ± 0%     4.27s ± 4%   +0.45%  (p=0.003 n=24+30)
Gunzip-4                    428ms ± 1%     420ms ± 0%   -1.88%  (p=0.000 n=23+23)
HTTPClientServer-4          549µs ± 1%     541µs ± 1%   -1.56%  (p=0.000 n=29+29)
JSONEncode-4                194ms ± 0%     188ms ± 4%     ~     (p=0.417 n=23+30)
JSONDecode-4                890ms ± 5%     831ms ± 0%   -6.55%  (p=0.000 n=30+23)
Mandelbrot200-4            47.3ms ± 2%    46.5ms ± 0%     ~     (p=0.980 n=30+26)
GoParse-4                  43.1ms ± 6%    43.8ms ± 6%   +1.65%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4      1.06µs ± 0%    1.07µs ± 3%     ~     (p=0.092 n=23+30)
RegexpMatchEasy0_1K-4      5.53µs ± 0%    5.51µs ± 0%   -0.24%  (p=0.000 n=25+25)
RegexpMatchEasy1_32-4      1.02µs ± 3%    1.01µs ± 0%   -1.27%  (p=0.000 n=30+24)
RegexpMatchEasy1_1K-4      7.26µs ± 0%    7.33µs ± 0%   +0.95%  (p=0.000 n=23+26)
RegexpMatchMedium_32-4     1.84µs ± 7%    1.79µs ± 1%     ~     (p=0.333 n=30+23)
RegexpMatchMedium_1K-4      553µs ± 0%     547µs ± 0%   -1.14%  (p=0.000 n=24+22)
RegexpMatchHard_32-4       30.8µs ± 1%    30.3µs ± 0%   -1.40%  (p=0.000 n=24+24)
RegexpMatchHard_1K-4        928µs ± 0%     929µs ± 5%   +0.12%  (p=0.013 n=23+30)
Revcomp-4                   8.13s ± 4%     6.32s ± 1%  -22.23%  (p=0.000 n=30+23)
Template-4                  899ms ± 6%     854ms ± 1%   -5.01%  (p=0.000 n=30+24)
TimeParse-4                4.66µs ± 4%    4.59µs ± 1%   -1.57%  (p=0.000 n=30+23)
TimeFormat-4               4.58µs ± 0%    4.61µs ± 0%   +0.57%  (p=0.000 n=26+24)
[Geo mean]                  717µs          698µs        -2.55%

name                     old speed      new speed      delta
GobDecode-4              7.63MB/s ± 6%  8.08MB/s ± 5%   +5.88%  (p=0.000 n=30+30)
GobEncode-4              9.85MB/s ± 4%  9.75MB/s ± 6%   -1.04%  (p=0.000 n=30+30)
Gzip-4                   4.56MB/s ± 0%  4.55MB/s ± 4%   -0.36%  (p=0.003 n=24+30)
Gunzip-4                 45.3MB/s ± 1%  46.2MB/s ± 0%   +1.92%  (p=0.000 n=23+23)
JSONEncode-4             10.0MB/s ± 0%  10.4MB/s ± 4%     ~     (p=0.403 n=23+30)
JSONDecode-4             2.18MB/s ± 5%  2.33MB/s ± 0%   +6.91%  (p=0.000 n=30+23)
GoParse-4                1.34MB/s ± 5%  1.32MB/s ± 5%   -1.66%  (p=0.000 n=30+30)
RegexpMatchEasy0_32-4    30.2MB/s ± 0%  29.8MB/s ± 3%     ~     (p=0.099 n=23+30)
RegexpMatchEasy0_1K-4     185MB/s ± 0%   186MB/s ± 0%   +0.24%  (p=0.000 n=25+25)
RegexpMatchEasy1_32-4    31.4MB/s ± 3%  31.8MB/s ± 0%   +1.24%  (p=0.000 n=30+24)
RegexpMatchEasy1_1K-4     141MB/s ± 0%   140MB/s ± 0%   -0.94%  (p=0.000 n=23+26)
RegexpMatchMedium_32-4    541kB/s ± 6%   560kB/s ± 0%   +3.45%  (p=0.000 n=30+23)
RegexpMatchMedium_1K-4   1.85MB/s ± 0%  1.87MB/s ± 0%   +1.08%  (p=0.000 n=24+23)
RegexpMatchHard_32-4     1.04MB/s ± 1%  1.06MB/s ± 1%   +1.48%  (p=0.000 n=24+24)
RegexpMatchHard_1K-4     1.10MB/s ± 0%  1.10MB/s ± 5%   +0.15%  (p=0.004 n=23+30)
Revcomp-4                31.3MB/s ± 4%  40.2MB/s ± 1%  +28.52%  (p=0.000 n=30+23)
Template-4               2.16MB/s ± 6%  2.27MB/s ± 1%   +5.18%  (p=0.000 n=30+24)
[Geo mean]               7.57MB/s       7.79MB/s        +2.98%

fixes #24907

Change-Id: I94afd0e3f53d62a1cf5e452f3dd6daf61be21785
Reviewed-on: https://go-review.googlesource.com/107376
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent d5a52e70
...@@ -341,6 +341,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -341,6 +341,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
gc.AddAux(&p.From, v) gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpARM64MOVBloadidx,
ssa.OpARM64MOVBUloadidx,
ssa.OpARM64MOVHloadidx,
ssa.OpARM64MOVHUloadidx,
ssa.OpARM64MOVWloadidx,
ssa.OpARM64MOVWUloadidx,
ssa.OpARM64MOVDloadidx:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Name = obj.NAME_NONE
p.From.Reg = v.Args[0].Reg()
p.From.Index = v.Args[1].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpARM64LDAR, case ssa.OpARM64LDAR,
ssa.OpARM64LDARW: ssa.OpARM64LDARW:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
...@@ -363,6 +377,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -363,6 +377,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v) gc.AddAux(&p.To, v)
case ssa.OpARM64MOVBstoreidx,
ssa.OpARM64MOVHstoreidx,
ssa.OpARM64MOVWstoreidx,
ssa.OpARM64MOVDstoreidx:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[2].Reg()
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()
case ssa.OpARM64STP: case ssa.OpARM64STP:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG p.From.Type = obj.TYPE_REGREG
...@@ -381,6 +406,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -381,6 +406,17 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg() p.To.Reg = v.Args[0].Reg()
gc.AddAux(&p.To, v) gc.AddAux(&p.To, v)
case ssa.OpARM64MOVBstorezeroidx,
ssa.OpARM64MOVHstorezeroidx,
ssa.OpARM64MOVWstorezeroidx,
ssa.OpARM64MOVDstorezeroidx:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = arm64.REGZERO
p.To.Type = obj.TYPE_MEM
p.To.Name = obj.NAME_NONE
p.To.Reg = v.Args[0].Reg()
p.To.Index = v.Args[1].Reg()
case ssa.OpARM64MOVQstorezero: case ssa.OpARM64MOVQstorezero:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REGREG p.From.Type = obj.TYPE_REGREG
......
...@@ -606,6 +606,29 @@ ...@@ -606,6 +606,29 @@
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(FMOVDload [off1+off2] {sym} ptr mem) (FMOVDload [off1+off2] {sym} ptr mem)
// register indexed load
(MOVDload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDloadidx ptr idx mem)
(MOVWUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWUloadidx ptr idx mem)
(MOVWload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWloadidx ptr idx mem)
(MOVHUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHUloadidx ptr idx mem)
(MOVHload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHloadidx ptr idx mem)
(MOVBUload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBUloadidx ptr idx mem)
(MOVBload [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBloadidx ptr idx mem)
(MOVDloadidx ptr (MOVDconst [c]) mem) -> (MOVDload [c] ptr mem)
(MOVDloadidx (MOVDconst [c]) ptr mem) -> (MOVDload [c] ptr mem)
(MOVWUloadidx ptr (MOVDconst [c]) mem) -> (MOVWUload [c] ptr mem)
(MOVWUloadidx (MOVDconst [c]) ptr mem) -> (MOVWUload [c] ptr mem)
(MOVWloadidx ptr (MOVDconst [c]) mem) -> (MOVWload [c] ptr mem)
(MOVWloadidx (MOVDconst [c]) ptr mem) -> (MOVWload [c] ptr mem)
(MOVHUloadidx ptr (MOVDconst [c]) mem) -> (MOVHUload [c] ptr mem)
(MOVHUloadidx (MOVDconst [c]) ptr mem) -> (MOVHUload [c] ptr mem)
(MOVHloadidx ptr (MOVDconst [c]) mem) -> (MOVHload [c] ptr mem)
(MOVHloadidx (MOVDconst [c]) ptr mem) -> (MOVHload [c] ptr mem)
(MOVBUloadidx ptr (MOVDconst [c]) mem) -> (MOVBUload [c] ptr mem)
(MOVBUloadidx (MOVDconst [c]) ptr mem) -> (MOVBUload [c] ptr mem)
(MOVBloadidx ptr (MOVDconst [c]) mem) -> (MOVBload [c] ptr mem)
(MOVBloadidx (MOVDconst [c]) ptr mem) -> (MOVBload [c] ptr mem)
(MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2) (MOVBstore [off1] {sym} (ADDconst [off2] ptr) val mem) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVBstore [off1+off2] {sym} ptr val mem) (MOVBstore [off1+off2] {sym} ptr val mem)
...@@ -643,6 +666,20 @@ ...@@ -643,6 +666,20 @@
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
(MOVQstorezero [off1+off2] {sym} ptr mem) (MOVQstorezero [off1+off2] {sym} ptr mem)
// register indexed store
(MOVDstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVDstoreidx ptr idx val mem)
(MOVWstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVWstoreidx ptr idx val mem)
(MOVHstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVHstoreidx ptr idx val mem)
(MOVBstore [off] {sym} (ADD ptr idx) val mem) && off == 0 && sym == nil -> (MOVBstoreidx ptr idx val mem)
(MOVDstoreidx ptr (MOVDconst [c]) val mem) -> (MOVDstore [c] ptr val mem)
(MOVDstoreidx (MOVDconst [c]) idx val mem) -> (MOVDstore [c] idx val mem)
(MOVWstoreidx ptr (MOVDconst [c]) val mem) -> (MOVWstore [c] ptr val mem)
(MOVWstoreidx (MOVDconst [c]) idx val mem) -> (MOVWstore [c] idx val mem)
(MOVHstoreidx ptr (MOVDconst [c]) val mem) -> (MOVHstore [c] ptr val mem)
(MOVHstoreidx (MOVDconst [c]) idx val mem) -> (MOVHstore [c] idx val mem)
(MOVBstoreidx ptr (MOVDconst [c]) val mem) -> (MOVBstore [c] ptr val mem)
(MOVBstoreidx (MOVDconst [c]) idx val mem) -> (MOVBstore [c] idx val mem)
(MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem) (MOVBload [off1] {sym1} (MOVDaddr [off2] {sym2} ptr) mem)
&& canMergeSym(sym1,sym2) && is32Bit(off1+off2) && canMergeSym(sym1,sym2) && is32Bit(off1+off2)
&& (ptr.Op != OpSB || !config.ctxt.Flag_shared) -> && (ptr.Op != OpSB || !config.ctxt.Flag_shared) ->
...@@ -736,6 +773,24 @@ ...@@ -736,6 +773,24 @@
(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem) (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
(STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) -> (MOVQstorezero [off] {sym} ptr mem) (STP [off] {sym} ptr (MOVDconst [0]) (MOVDconst [0]) mem) -> (MOVQstorezero [off] {sym} ptr mem)
// register indexed store zero
(MOVDstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVDstorezeroidx ptr idx mem)
(MOVWstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVWstorezeroidx ptr idx mem)
(MOVHstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVHstorezeroidx ptr idx mem)
(MOVBstorezero [off] {sym} (ADD ptr idx) mem) && off == 0 && sym == nil -> (MOVBstorezeroidx ptr idx mem)
(MOVDstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVDstorezeroidx ptr idx mem)
(MOVWstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVWstorezeroidx ptr idx mem)
(MOVHstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVHstorezeroidx ptr idx mem)
(MOVBstoreidx ptr idx (MOVDconst [0]) mem) -> (MOVBstorezeroidx ptr idx mem)
(MOVDstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVDstorezero [c] ptr mem)
(MOVDstorezeroidx (MOVDconst [c]) idx mem) -> (MOVDstorezero [c] idx mem)
(MOVWstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVWstorezero [c] ptr mem)
(MOVWstorezeroidx (MOVDconst [c]) idx mem) -> (MOVWstorezero [c] idx mem)
(MOVHstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVHstorezero [c] ptr mem)
(MOVHstorezeroidx (MOVDconst [c]) idx mem) -> (MOVHstorezero [c] idx mem)
(MOVBstorezeroidx ptr (MOVDconst [c]) mem) -> (MOVBstorezero [c] ptr mem)
(MOVBstorezeroidx (MOVDconst [c]) idx mem) -> (MOVBstorezero [c] idx mem)
// replace load from same location as preceding store with zero/sign extension (or copy in case of full width) // replace load from same location as preceding store with zero/sign extension (or copy in case of full width)
// these seem to have bad interaction with other rules, resulting in slower code // these seem to have bad interaction with other rules, resulting in slower code
//(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x) //(MOVBload [off] {sym} ptr (MOVBstore [off2] {sym2} ptr2 x _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVBreg x)
...@@ -756,6 +811,21 @@ ...@@ -756,6 +811,21 @@
(MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0]) (MOVWUload [off] {sym} ptr (MOVWstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
(MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0]) (MOVDload [off] {sym} ptr (MOVDstorezero [off2] {sym2} ptr2 _)) && sym == sym2 && off == off2 && isSamePtr(ptr, ptr2) -> (MOVDconst [0])
(MOVBloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
(MOVBUloadidx ptr idx (MOVBstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
(MOVHloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
(MOVHUloadidx ptr idx (MOVHstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
(MOVWloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
(MOVWUloadidx ptr idx (MOVWstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
(MOVDloadidx ptr idx (MOVDstorezeroidx ptr2 idx2 _))
&& (isSamePtr(ptr, ptr2) && isSamePtr(idx, idx2) || isSamePtr(ptr, idx2) && isSamePtr(idx, ptr2)) -> (MOVDconst [0])
// don't extend after proper load // don't extend after proper load
(MOVBreg x:(MOVBload _ _)) -> (MOVDreg x) (MOVBreg x:(MOVBload _ _)) -> (MOVDreg x)
(MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x) (MOVBUreg x:(MOVBUload _ _)) -> (MOVDreg x)
...@@ -772,6 +842,21 @@ ...@@ -772,6 +842,21 @@
(MOVWUreg x:(MOVBUload _ _)) -> (MOVDreg x) (MOVWUreg x:(MOVBUload _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVHUload _ _)) -> (MOVDreg x) (MOVWUreg x:(MOVHUload _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVWUload _ _)) -> (MOVDreg x) (MOVWUreg x:(MOVWUload _ _)) -> (MOVDreg x)
(MOVBreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
(MOVBUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
(MOVHreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
(MOVHreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
(MOVHreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x)
(MOVHUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
(MOVHUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
(MOVWreg x:(MOVBloadidx _ _ _)) -> (MOVDreg x)
(MOVWreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
(MOVWreg x:(MOVHloadidx _ _ _)) -> (MOVDreg x)
(MOVWreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
(MOVWreg x:(MOVWloadidx _ _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVBUloadidx _ _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVHUloadidx _ _ _)) -> (MOVDreg x)
(MOVWUreg x:(MOVWUloadidx _ _ _)) -> (MOVDreg x)
// fold double extensions // fold double extensions
(MOVBreg x:(MOVBreg _)) -> (MOVDreg x) (MOVBreg x:(MOVBreg _)) -> (MOVDreg x)
...@@ -803,6 +888,18 @@ ...@@ -803,6 +888,18 @@
(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem) (MOVHstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVHstore [off] {sym} ptr x mem)
(MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVWstore [off] {sym} ptr x mem) (MOVWstore [off] {sym} ptr (MOVWUreg x) mem) -> (MOVWstore [off] {sym} ptr x mem)
(MOVBstoreidx ptr idx (MOVBreg x) mem) -> (MOVBstoreidx ptr idx x mem)
(MOVBstoreidx ptr idx (MOVBUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
(MOVBstoreidx ptr idx (MOVHreg x) mem) -> (MOVBstoreidx ptr idx x mem)
(MOVBstoreidx ptr idx (MOVHUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
(MOVBstoreidx ptr idx (MOVWreg x) mem) -> (MOVBstoreidx ptr idx x mem)
(MOVBstoreidx ptr idx (MOVWUreg x) mem) -> (MOVBstoreidx ptr idx x mem)
(MOVHstoreidx ptr idx (MOVHreg x) mem) -> (MOVHstoreidx ptr idx x mem)
(MOVHstoreidx ptr idx (MOVHUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
(MOVHstoreidx ptr idx (MOVWreg x) mem) -> (MOVHstoreidx ptr idx x mem)
(MOVHstoreidx ptr idx (MOVWUreg x) mem) -> (MOVHstoreidx ptr idx x mem)
(MOVWstoreidx ptr idx (MOVWreg x) mem) -> (MOVWstoreidx ptr idx x mem)
(MOVWstoreidx ptr idx (MOVWUreg x) mem) -> (MOVWstoreidx ptr idx x mem)
// if a register move has only 1 use, just use the same register without emitting instruction // if a register move has only 1 use, just use the same register without emitting instruction
// MOVDnop doesn't emit instruction, only for ensuring the type. // MOVDnop doesn't emit instruction, only for ensuring the type.
...@@ -1410,6 +1507,17 @@ ...@@ -1410,6 +1507,17 @@
&& clobber(x0) && clobber(x1) && clobber(x0) && clobber(x1)
&& clobber(y0) && clobber(y1) && clobber(y0) && clobber(y1)
-> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem) -> @mergePoint(b,x0,x1) (MOVHUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
(ORshiftLL <t> [8]
y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem))
y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x0) && clobber(x1)
&& clobber(y0) && clobber(y1)
-> @mergePoint(b,x0,x1) (MOVHUloadidx <t> ptr0 idx0 mem)
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 -> load 32-bit
(ORshiftLL <t> [24] o0:(ORshiftLL [16] (ORshiftLL <t> [24] o0:(ORshiftLL [16]
...@@ -1426,6 +1534,21 @@ ...@@ -1426,6 +1534,21 @@
&& clobber(y1) && clobber(y2) && clobber(y1) && clobber(y2)
&& clobber(o0) && clobber(o0)
-> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem) -> @mergePoint(b,x0,x1,x2) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
x0:(MOVHUloadidx ptr0 idx0 mem)
y1:(MOVDnop x1:(MOVBUload [2] {s} p1:(ADD ptr1 idx1) mem)))
y2:(MOVDnop x2:(MOVBUload [3] {s} p mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& y1.Uses == 1 && y2.Uses == 1
&& o0.Uses == 1
&& mergePoint(b,x0,x1,x2) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(y1) && clobber(y2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (MOVWUloadidx <t> ptr0 idx0 mem)
// b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit // b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24 | b[4]<<32 | b[5]<<40 | b[6]<<48 | b[7]<<56 -> load 64-bit
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
...@@ -1446,6 +1569,23 @@ ...@@ -1446,6 +1569,23 @@
&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem) -> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
x0:(MOVWUloadidx ptr0 idx0 mem)
y1:(MOVDnop x1:(MOVBUload [4] {s} p1:(ADD ptr1 idx1) mem)))
y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
y3:(MOVDnop x3:(MOVBUload [6] {s} p mem)))
y4:(MOVDnop x4:(MOVBUload [7] {s} p mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
&& y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
&& clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (MOVDloadidx <t> ptr0 idx0 mem)
// b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit // b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 32-bit
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
...@@ -1464,8 +1604,24 @@ ...@@ -1464,8 +1604,24 @@
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(o0) && clobber(o1) && clobber(s0) && clobber(o0) && clobber(o1) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem) -> @mergePoint(b,x0,x1,x2,x3) (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
y0:(MOVDnop x0:(MOVBUload [3] {s} p mem)))
y1:(MOVDnop x1:(MOVBUload [2] {s} p mem)))
y2:(MOVDnop x2:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
y3:(MOVDnop x3:(MOVBUloadidx ptr0 idx0 mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(o0) && clobber(o1) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3) (MOVWUloadidx <t> ptr0 idx0 mem)
// b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit, reverse // b[7]<<56 | b[6]<<48 | b[5]<<40 | b[4]<<32 | b[3]<<24 | b[2]<<16 | b[1]<<8 | b[0] -> load 64-bit
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem))) y0:(MOVDnop x0:(MOVBUload [i7] {s} p mem)))
y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem))) y1:(MOVDnop x1:(MOVBUload [i6] {s} p mem)))
...@@ -1495,7 +1651,33 @@ ...@@ -1495,7 +1651,33 @@
&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7) && clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
&& clobber(o4) && clobber(o5) && clobber(s0) && clobber(o4) && clobber(o5) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)) -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
y0:(MOVDnop x0:(MOVBUload [7] {s} p mem)))
y1:(MOVDnop x1:(MOVBUload [6] {s} p mem)))
y2:(MOVDnop x2:(MOVBUload [5] {s} p mem)))
y3:(MOVDnop x3:(MOVBUload [4] {s} p mem)))
y4:(MOVDnop x4:(MOVBUload [3] {s} p mem)))
y5:(MOVDnop x5:(MOVBUload [2] {s} p mem)))
y6:(MOVDnop x6:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
y7:(MOVDnop x7:(MOVBUloadidx ptr0 idx0 mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
&& clobber(o4) && clobber(o5) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (MOVDloadidx <t> ptr0 idx0 mem)
// big endian loads // big endian loads
// b[1] | b[0]<<8 -> load 16-bit, reverse // b[1] | b[0]<<8 -> load 16-bit, reverse
...@@ -1509,6 +1691,17 @@ ...@@ -1509,6 +1691,17 @@
&& clobber(x0) && clobber(x1) && clobber(x0) && clobber(x1)
&& clobber(y0) && clobber(y1) && clobber(y0) && clobber(y1)
-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem)) -> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUload <t> [i0] {s} p mem))
(ORshiftLL <t> [8]
y0:(MOVDnop x0:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem))
y1:(MOVDnop x1:(MOVBUloadidx ptr0 idx0 mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1
&& mergePoint(b,x0,x1) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x0) && clobber(x1)
&& clobber(y0) && clobber(y1)
-> @mergePoint(b,x0,x1) (REV16W <t> (MOVHUloadidx <t> ptr0 idx0 mem))
// b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse // b[3] | b[2]<<8 | b[1]<<16 | b[0]<<24 -> load 32-bit, reverse
(ORshiftLL <t> [24] o0:(ORshiftLL [16] (ORshiftLL <t> [24] o0:(ORshiftLL [16]
...@@ -1525,6 +1718,21 @@ ...@@ -1525,6 +1718,21 @@
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y0) && clobber(y1) && clobber(y2)
&& clobber(o0) && clobber(o0)
-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)) -> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
(ORshiftLL <t> [24] o0:(ORshiftLL [16]
y0:(REV16W x0:(MOVHUload [2] {s} p mem))
y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
y2:(MOVDnop x2:(MOVBUloadidx ptr0 idx0 mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1
&& o0.Uses == 1
&& mergePoint(b,x0,x1,x2) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2)
&& clobber(y0) && clobber(y1) && clobber(y2)
&& clobber(o0)
-> @mergePoint(b,x0,x1,x2) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
// b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse // b[7] | b[6]<<8 | b[5]<<16 | b[4]<<24 | b[3]<<32 | b[2]<<40 | b[1]<<48 | b[0]<<56 -> load 64-bit, reverse
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32] (ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
...@@ -1545,6 +1753,23 @@ ...@@ -1545,6 +1753,23 @@
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)) -> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
(ORshiftLL <t> [56] o0:(ORshiftLL [48] o1:(ORshiftLL [40] o2:(ORshiftLL [32]
y0:(REVW x0:(MOVWUload [4] {s} p mem))
y1:(MOVDnop x1:(MOVBUload [3] {s} p mem)))
y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
y3:(MOVDnop x3:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
y4:(MOVDnop x4:(MOVBUloadidx ptr0 idx0 mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1 && x4.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1 && y4.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3) && clobber(x4)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y4)
&& clobber(o0) && clobber(o1) && clobber(o2)
-> @mergePoint(b,x0,x1,x2,x3,x4) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
// b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse // b[0]<<24 | b[1]<<16 | b[2]<<8 | b[3] -> load 32-bit, reverse
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24] (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
...@@ -1563,6 +1788,22 @@ ...@@ -1563,6 +1788,22 @@
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3) && clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(o0) && clobber(o1) && clobber(s0) && clobber(o0) && clobber(o1) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem)) -> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUload <t> {s} (OffPtr <p.Type> [i0] p) mem))
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] s0:(SLLconst [24]
y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && s0.Uses == 1
&& mergePoint(b,x0,x1,x2,x3) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(o0) && clobber(o1) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3) (REVW <t> (MOVWUloadidx <t> ptr0 idx0 mem))
// b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse // b[0]<<56 | b[1]<<48 | b[2]<<40 | b[3]<<32 | b[4]<<24 | b[5]<<16 | b[6]<<8 | b[7] -> load 64-bit, reverse
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56] (OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
...@@ -1595,6 +1836,32 @@ ...@@ -1595,6 +1836,32 @@
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3) && clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
&& clobber(o4) && clobber(o5) && clobber(s0) && clobber(o4) && clobber(o5) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem)) -> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDload <t> {s} (OffPtr <p.Type> [i0] p) mem))
(OR <t> o0:(ORshiftLL [8] o1:(ORshiftLL [16] o2:(ORshiftLL [24] o3:(ORshiftLL [32] o4:(ORshiftLL [40] o5:(ORshiftLL [48] s0:(SLLconst [56]
y0:(MOVDnop x0:(MOVBUloadidx ptr0 idx0 mem)))
y1:(MOVDnop x1:(MOVBUload [1] {s} p1:(ADD ptr1 idx1) mem)))
y2:(MOVDnop x2:(MOVBUload [2] {s} p mem)))
y3:(MOVDnop x3:(MOVBUload [3] {s} p mem)))
y4:(MOVDnop x4:(MOVBUload [4] {s} p mem)))
y5:(MOVDnop x5:(MOVBUload [5] {s} p mem)))
y6:(MOVDnop x6:(MOVBUload [6] {s} p mem)))
y7:(MOVDnop x7:(MOVBUload [7] {s} p mem)))
&& s == nil
&& x0.Uses == 1 && x1.Uses == 1 && x2.Uses == 1 && x3.Uses == 1
&& x4.Uses == 1 && x5.Uses == 1 && x6.Uses == 1 && x7.Uses == 1
&& y0.Uses == 1 && y1.Uses == 1 && y2.Uses == 1 && y3.Uses == 1
&& y4.Uses == 1 && y5.Uses == 1 && y6.Uses == 1 && y7.Uses == 1
&& o0.Uses == 1 && o1.Uses == 1 && o2.Uses == 1 && o3.Uses == 1
&& o4.Uses == 1 && o5.Uses == 1 && s0.Uses == 1
&& mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) != nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0) && clobber(x1) && clobber(x2) && clobber(x3)
&& clobber(x4) && clobber(x5) && clobber(x6) && clobber(x7)
&& clobber(y0) && clobber(y1) && clobber(y2) && clobber(y3)
&& clobber(y4) && clobber(y5) && clobber(y6) && clobber(y7)
&& clobber(o0) && clobber(o1) && clobber(o2) && clobber(o3)
&& clobber(o4) && clobber(o5) && clobber(s0)
-> @mergePoint(b,x0,x1,x2,x3,x4,x5,x6,x7) (REV <t> (MOVDloadidx <t> ptr0 idx0 mem))
// Combine zero stores into larger (unaligned) stores. // Combine zero stores into larger (unaligned) stores.
(MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem)) (MOVBstorezero [i] {s} ptr0 x:(MOVBstorezero [j] {s} ptr1 mem))
...@@ -1604,6 +1871,12 @@ ...@@ -1604,6 +1871,12 @@
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVHstorezero [min(i,j)] {s} ptr0 mem) -> (MOVHstorezero [min(i,j)] {s} ptr0 mem)
(MOVBstorezero [1] {s} (ADD ptr0 idx0) x:(MOVBstorezeroidx ptr1 idx1 mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstorezeroidx ptr1 idx1 mem)
(MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem)) (MOVHstorezero [i] {s} ptr0 x:(MOVHstorezero [j] {s} ptr1 mem))
&& x.Uses == 1 && x.Uses == 1
&& areAdjacentOffsets(i,j,2) && areAdjacentOffsets(i,j,2)
...@@ -1611,6 +1884,12 @@ ...@@ -1611,6 +1884,12 @@
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVWstorezero [min(i,j)] {s} ptr0 mem) -> (MOVWstorezero [min(i,j)] {s} ptr0 mem)
(MOVHstorezero [2] {s} (ADD ptr0 idx0) x:(MOVHstorezeroidx ptr1 idx1 mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstorezeroidx ptr1 idx1 mem)
(MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem)) (MOVWstorezero [i] {s} ptr0 x:(MOVWstorezero [j] {s} ptr1 mem))
&& x.Uses == 1 && x.Uses == 1
&& areAdjacentOffsets(i,j,4) && areAdjacentOffsets(i,j,4)
...@@ -1618,6 +1897,12 @@ ...@@ -1618,6 +1897,12 @@
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVDstorezero [min(i,j)] {s} ptr0 mem) -> (MOVDstorezero [min(i,j)] {s} ptr0 mem)
(MOVWstorezero [4] {s} (ADD ptr0 idx0) x:(MOVWstorezeroidx ptr1 idx1 mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVDstorezeroidx ptr1 idx1 mem)
(MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem)) (MOVDstorezero [i] {s} ptr0 x:(MOVDstorezero [j] {s} ptr1 mem))
&& x.Uses == 1 && x.Uses == 1
&& areAdjacentOffsets(i,j,8) && areAdjacentOffsets(i,j,8)
...@@ -1625,6 +1910,12 @@ ...@@ -1625,6 +1910,12 @@
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVQstorezero [min(i,j)] {s} ptr0 mem) -> (MOVQstorezero [min(i,j)] {s} ptr0 mem)
(MOVDstorezero [8] {s} p0:(ADD ptr0 idx0) x:(MOVDstorezeroidx ptr1 idx1 mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVQstorezero [0] {s} p0 mem)
// Combine stores into larger (unaligned) stores. // Combine stores into larger (unaligned) stores.
(MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) (MOVBstore [i] {s} ptr0 (SRLconst [8] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
...@@ -1632,69 +1923,150 @@ ...@@ -1632,69 +1923,150 @@
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem) -> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] w) x:(MOVBstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w mem)
(MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem) -> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w mem)
(MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem)) (MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem) -> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w mem)
(MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem)) (MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem) -> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [8] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w mem)
(MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem)) (MOVBstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w0 mem) -> (MOVHstore [i-1] {s} ptr0 w0 mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w0 mem)
(MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem)) (MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc) && getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
&& getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2) && getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
&& getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8 && getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w0 mem) -> (MOVHstore [i-1] {s} ptr0 w0 mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [bfc] w) x:(MOVBstoreidx ptr1 idx1 w0:(UBFX [bfc2] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
&& getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
&& getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w0 mem)
(MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem)) (MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w0 mem) -> (MOVHstore [i-1] {s} ptr0 w0 mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (SRLconst [j] (MOVDreg w)) x:(MOVBstoreidx ptr1 idx1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w0 mem)
(MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem)) (MOVHstore [i] {s} ptr0 (SRLconst [16] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVWstore [i-2] {s} ptr0 w mem) -> (MOVWstore [i-2] {s} ptr0 w mem)
(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] w) x:(MOVHstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w mem)
(MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem)) (MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVWstore [i-2] {s} ptr0 w mem) -> (MOVWstore [i-2] {s} ptr0 w mem)
(MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w mem)
(MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem)) (MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVDreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVWstore [i-2] {s} ptr0 w mem) -> (MOVWstore [i-2] {s} ptr0 w mem)
(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [16] (MOVDreg w)) x:(MOVHstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w mem)
(MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem)) (MOVHstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVHstore [i-2] {s} ptr1 w0:(SRLconst [j-16] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVWstore [i-2] {s} ptr0 w0 mem) -> (MOVWstore [i-2] {s} ptr0 w0 mem)
(MOVHstore [2] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVHstoreidx ptr1 idx1 w0:(SRLconst [j-16] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w0 mem)
(MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem)) (MOVWstore [i] {s} ptr0 (SRLconst [32] w) x:(MOVWstore [i-4] {s} ptr1 w mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVDstore [i-4] {s} ptr0 w mem) -> (MOVDstore [i-4] {s} ptr0 w mem)
(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [32] w) x:(MOVWstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVDstoreidx ptr1 idx1 w mem)
(MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem)) (MOVWstore [i] {s} ptr0 (SRLconst [j] w) x:(MOVWstore [i-4] {s} ptr1 w0:(SRLconst [j-32] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& isSamePtr(ptr0, ptr1) && isSamePtr(ptr0, ptr1)
&& clobber(x) && clobber(x)
-> (MOVDstore [i-4] {s} ptr0 w0 mem) -> (MOVDstore [i-4] {s} ptr0 w0 mem)
(MOVWstore [4] {s} (ADD ptr0 idx0) (SRLconst [j] w) x:(MOVWstoreidx ptr1 idx1 w0:(SRLconst [j-32] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVDstoreidx ptr1 idx1 w0 mem)
(MOVBstore [i] {s} ptr w (MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
...@@ -1718,6 +2090,32 @@ ...@@ -1718,6 +2090,32 @@
&& clobber(x5) && clobber(x5)
&& clobber(x6) && clobber(x6)
-> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem) -> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
(MOVBstore [7] {s} p w
x0:(MOVBstore [6] {s} p (SRLconst [8] w)
x1:(MOVBstore [5] {s} p (SRLconst [16] w)
x2:(MOVBstore [4] {s} p (SRLconst [24] w)
x3:(MOVBstore [3] {s} p (SRLconst [32] w)
x4:(MOVBstore [2] {s} p (SRLconst [40] w)
x5:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [48] w)
x6:(MOVBstoreidx ptr0 idx0 (SRLconst [56] w) mem))))))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& x3.Uses == 1
&& x4.Uses == 1
&& x5.Uses == 1
&& x6.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
&& clobber(x3)
&& clobber(x4)
&& clobber(x5)
&& clobber(x6)
-> (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
(MOVBstore [i] {s} ptr w (MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w)
x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w) x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w)
...@@ -1729,6 +2127,20 @@ ...@@ -1729,6 +2127,20 @@
&& clobber(x1) && clobber(x1)
&& clobber(x2) && clobber(x2)
-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem) -> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
(MOVBstore [3] {s} p w
x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w)
x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
(MOVBstore [i] {s} ptr w (MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w))
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w)) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVDreg w))
...@@ -1740,6 +2152,20 @@ ...@@ -1740,6 +2152,20 @@
&& clobber(x1) && clobber(x1)
&& clobber(x2) && clobber(x2)
-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem) -> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
(MOVBstore [3] {s} p w
x0:(MOVBstore [2] {s} p (SRLconst [8] (MOVDreg w))
x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] (MOVDreg w))
x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] (MOVDreg w)) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
(MOVBstore [i] {s} ptr w (MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] w)
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w) x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] w)
...@@ -1751,26 +2177,70 @@ ...@@ -1751,26 +2177,70 @@
&& clobber(x1) && clobber(x1)
&& clobber(x2) && clobber(x2)
-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem) -> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
(MOVBstore [3] {s} p w
x0:(MOVBstore [2] {s} p (SRLconst [8] w)
x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (SRLconst [16] w)
x2:(MOVBstoreidx ptr0 idx0 (SRLconst [24] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& isSamePtr(p1, p)
&& clobber(x0)
&& clobber(x1)
&& clobber(x2)
-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem)) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem) -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem)) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem) -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem) -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem)) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem) -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem)) (MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVDreg w)) mem))
&& x.Uses == 1 && x.Uses == 1
&& clobber(x) && clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem) -> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (SRLconst [8] (MOVDreg w)) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
// FP simplification // FP simplification
(FNEGS (FMULS x y)) -> (FNMULS x y) (FNEGS (FMULS x y)) -> (FNMULS x y)
......
...@@ -144,6 +144,7 @@ func init() { ...@@ -144,6 +144,7 @@ func init() {
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}} gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}} gp22 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp, gp}}
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}} gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
gp2load = regInfo{inputs: []regMask{gpspsbg, gpg}, outputs: []regMask{gp}}
gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}} gpstore = regInfo{inputs: []regMask{gpspsbg, gpg}}
gpstore0 = regInfo{inputs: []regMask{gpspsbg}} gpstore0 = regInfo{inputs: []regMask{gpspsbg}}
gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}} gpstore2 = regInfo{inputs: []regMask{gpspsbg, gpg, gpg}}
...@@ -318,6 +319,15 @@ func init() { ...@@ -318,6 +319,15 @@ func init() {
{name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVSload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVS", typ: "Float32", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
{name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem. {name: "FMOVDload", argLength: 2, reg: fpload, aux: "SymOff", asm: "FMOVD", typ: "Float64", faultOnNilArg0: true, symEffect: "Read"}, // load from arg0 + auxInt + aux. arg1=mem.
// register indexed load
{name: "MOVDloadidx", argLength: 3, reg: gp2load, asm: "MOVD", faultOnNilArg0: true}, // load 64-bit dword from arg0 + arg1, arg2 = mem.
{name: "MOVWloadidx", argLength: 3, reg: gp2load, asm: "MOVW", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVWUloadidx", argLength: 3, reg: gp2load, asm: "MOVWU", faultOnNilArg0: true}, // load 32-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVHloadidx", argLength: 3, reg: gp2load, asm: "MOVH", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVHUloadidx", argLength: 3, reg: gp2load, asm: "MOVHU", faultOnNilArg0: true}, // load 16-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVBloadidx", argLength: 3, reg: gp2load, asm: "MOVB", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, sign-extended to 64-bit, arg2=mem.
{name: "MOVBUloadidx", argLength: 3, reg: gp2load, asm: "MOVBU", faultOnNilArg0: true}, // load 8-bit word from arg0 + arg1, zero-extended to 64-bit, arg2=mem.
{name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVBstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVHstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "MOVWstore", argLength: 3, reg: gpstore, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
...@@ -326,12 +336,24 @@ func init() { ...@@ -326,12 +336,24 @@ func init() {
{name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVSstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVS", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
{name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem. {name: "FMOVDstore", argLength: 3, reg: fpstore, aux: "SymOff", asm: "FMOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of arg1 to arg0 + auxInt + aux. arg2=mem.
// register indexed store
{name: "MOVBstoreidx", argLength: 4, reg: gpstore2, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVHstoreidx", argLength: 4, reg: gpstore2, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVWstoreidx", argLength: 4, reg: gpstore2, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVDstoreidx", argLength: 4, reg: gpstore2, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of arg2 to arg0 + arg1, arg3 = mem.
{name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVBstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVB", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 1 byte of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVHstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVH", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 2 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 4 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVD", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 8 bytes of zero to arg0 + auxInt + aux. arg1=mem.
{name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem. {name: "MOVQstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "STP", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // store 16 bytes of zero to arg0 + auxInt + aux. arg1=mem.
// register indexed store zero
{name: "MOVBstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVB", typ: "Mem", faultOnNilArg0: true}, // store 1 byte of zero to arg0 + arg1, arg2 = mem.
{name: "MOVHstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVH", typ: "Mem", faultOnNilArg0: true}, // store 2 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVWstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVW", typ: "Mem", faultOnNilArg0: true}, // store 4 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "MOVDstorezeroidx", argLength: 3, reg: gpstore, asm: "MOVD", typ: "Mem", faultOnNilArg0: true}, // store 8 bytes of zero to arg0 + arg1, arg2 = mem.
{name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion) {name: "FMOVDgpfp", argLength: 1, reg: gpfp, asm: "FMOVD"}, // move int64 to float64 (no conversion)
{name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion) {name: "FMOVDfpgp", argLength: 1, reg: fpgp, asm: "FMOVD"}, // move float64 to int64 (no conversion)
......
...@@ -1152,6 +1152,13 @@ const ( ...@@ -1152,6 +1152,13 @@ const (
OpARM64MOVDload OpARM64MOVDload
OpARM64FMOVSload OpARM64FMOVSload
OpARM64FMOVDload OpARM64FMOVDload
OpARM64MOVDloadidx
OpARM64MOVWloadidx
OpARM64MOVWUloadidx
OpARM64MOVHloadidx
OpARM64MOVHUloadidx
OpARM64MOVBloadidx
OpARM64MOVBUloadidx
OpARM64MOVBstore OpARM64MOVBstore
OpARM64MOVHstore OpARM64MOVHstore
OpARM64MOVWstore OpARM64MOVWstore
...@@ -1159,11 +1166,19 @@ const ( ...@@ -1159,11 +1166,19 @@ const (
OpARM64STP OpARM64STP
OpARM64FMOVSstore OpARM64FMOVSstore
OpARM64FMOVDstore OpARM64FMOVDstore
OpARM64MOVBstoreidx
OpARM64MOVHstoreidx
OpARM64MOVWstoreidx
OpARM64MOVDstoreidx
OpARM64MOVBstorezero OpARM64MOVBstorezero
OpARM64MOVHstorezero OpARM64MOVHstorezero
OpARM64MOVWstorezero OpARM64MOVWstorezero
OpARM64MOVDstorezero OpARM64MOVDstorezero
OpARM64MOVQstorezero OpARM64MOVQstorezero
OpARM64MOVBstorezeroidx
OpARM64MOVHstorezeroidx
OpARM64MOVWstorezeroidx
OpARM64MOVDstorezeroidx
OpARM64FMOVDgpfp OpARM64FMOVDgpfp
OpARM64FMOVDfpgp OpARM64FMOVDfpgp
OpARM64MOVBreg OpARM64MOVBreg
...@@ -15070,6 +15085,111 @@ var opcodeTable = [...]opInfo{ ...@@ -15070,6 +15085,111 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVDloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVWloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVWUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVWU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVHloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVHUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVHU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVBloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{
name: "MOVBUloadidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVBU,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
outputs: []outputInfo{
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
},
},
},
{ {
name: "MOVBstore", name: "MOVBstore",
auxType: auxSymOff, auxType: auxSymOff,
...@@ -15169,6 +15289,58 @@ var opcodeTable = [...]opInfo{ ...@@ -15169,6 +15289,58 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVBstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVHstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVWstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVDstoreidx",
argLen: 4,
faultOnNilArg0: true,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{ {
name: "MOVBstorezero", name: "MOVBstorezero",
auxType: auxSymOff, auxType: auxSymOff,
...@@ -15234,6 +15406,54 @@ var opcodeTable = [...]opInfo{ ...@@ -15234,6 +15406,54 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVBstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVB,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVHstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVH,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVWstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVW,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{
name: "MOVDstorezeroidx",
argLen: 3,
faultOnNilArg0: true,
asm: arm64.AMOVD,
reg: regInfo{
inputs: []inputInfo{
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
{0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB
},
},
},
{ {
name: "FMOVDgpfp", name: "FMOVDgpfp",
argLen: 1, argLen: 1,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -22,7 +22,7 @@ var sink16 uint16 ...@@ -22,7 +22,7 @@ var sink16 uint16
func load_le64(b []byte) { func load_le64(b []byte) {
// amd64:`MOVQ\s\(.*\),` // amd64:`MOVQ\s\(.*\),`
// s390x:`MOVDBR\s\(.*\),` // s390x:`MOVDBR\s\(.*\),`
// arm64:`MOVD\s\(R[0-9]+\),` // arm64:`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`
// ppc64le:`MOVD\s`,-`MOV[BHW]Z` // ppc64le:`MOVD\s`,-`MOV[BHW]Z`
sink64 = binary.LittleEndian.Uint64(b) sink64 = binary.LittleEndian.Uint64(b)
} }
...@@ -30,7 +30,7 @@ func load_le64(b []byte) { ...@@ -30,7 +30,7 @@ func load_le64(b []byte) {
func load_le64_idx(b []byte, idx int) { func load_le64_idx(b []byte, idx int) {
// amd64:`MOVQ\s\(.*\)\(.*\*1\),` // amd64:`MOVQ\s\(.*\)\(.*\*1\),`
// s390x:`MOVDBR\s\(.*\)\(.*\*1\),` // s390x:`MOVDBR\s\(.*\)\(.*\*1\),`
// arm64:`MOVD\s\(R[0-9]+\),` // arm64:`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BHW]`
// ppc64le:`MOVD\s`,-`MOV[BHW]Z\s` // ppc64le:`MOVD\s`,-`MOV[BHW]Z\s`
sink64 = binary.LittleEndian.Uint64(b[idx:]) sink64 = binary.LittleEndian.Uint64(b[idx:])
} }
...@@ -38,7 +38,7 @@ func load_le64_idx(b []byte, idx int) { ...@@ -38,7 +38,7 @@ func load_le64_idx(b []byte, idx int) {
func load_le32(b []byte) { func load_le32(b []byte) {
// amd64:`MOVL\s\(.*\),` 386:`MOVL\s\(.*\),` // amd64:`MOVL\s\(.*\),` 386:`MOVL\s\(.*\),`
// s390x:`MOVWBR\s\(.*\),` // s390x:`MOVWBR\s\(.*\),`
// arm64:`MOVWU\s\(R[0-9]+\),` // arm64:`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`
// ppc64le:`MOVWZ\s` // ppc64le:`MOVWZ\s`
sink32 = binary.LittleEndian.Uint32(b) sink32 = binary.LittleEndian.Uint32(b)
} }
...@@ -46,7 +46,7 @@ func load_le32(b []byte) { ...@@ -46,7 +46,7 @@ func load_le32(b []byte) {
func load_le32_idx(b []byte, idx int) { func load_le32_idx(b []byte, idx int) {
// amd64:`MOVL\s\(.*\)\(.*\*1\),` 386:`MOVL\s\(.*\)\(.*\*1\),` // amd64:`MOVL\s\(.*\)\(.*\*1\),` 386:`MOVL\s\(.*\)\(.*\*1\),`
// s390x:`MOVWBR\s\(.*\)\(.*\*1\),` // s390x:`MOVWBR\s\(.*\)\(.*\*1\),`
// arm64:`MOVWU\s\(R[0-9]+\),` // arm64:`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[BH]`
// ppc64le:`MOVWZ\s` // ppc64le:`MOVWZ\s`
sink32 = binary.LittleEndian.Uint32(b[idx:]) sink32 = binary.LittleEndian.Uint32(b[idx:])
} }
...@@ -54,50 +54,54 @@ func load_le32_idx(b []byte, idx int) { ...@@ -54,50 +54,54 @@ func load_le32_idx(b []byte, idx int) {
func load_le16(b []byte) { func load_le16(b []byte) {
// amd64:`MOVWLZX\s\(.*\),` // amd64:`MOVWLZX\s\(.*\),`
// ppc64le:`MOVHZ\s` // ppc64le:`MOVHZ\s`
// arm64:`MOVHU\s\(R[0-9]+\),`,-`MOVB`
sink16 = binary.LittleEndian.Uint16(b) sink16 = binary.LittleEndian.Uint16(b)
} }
func load_le16_idx(b []byte, idx int) { func load_le16_idx(b []byte, idx int) {
// amd64:`MOVWLZX\s\(.*\),` // amd64:`MOVWLZX\s\(.*\),`
// ppc64le:`MOVHZ\s` // ppc64le:`MOVHZ\s`
// arm64:`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
sink16 = binary.LittleEndian.Uint16(b[idx:]) sink16 = binary.LittleEndian.Uint16(b[idx:])
} }
func load_be64(b []byte) { func load_be64(b []byte) {
// amd64:`BSWAPQ` // amd64:`BSWAPQ`
// s390x:`MOVD\s\(.*\),` // s390x:`MOVD\s\(.*\),`
// arm64:`REV` // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
sink64 = binary.BigEndian.Uint64(b) sink64 = binary.BigEndian.Uint64(b)
} }
func load_be64_idx(b []byte, idx int) { func load_be64_idx(b []byte, idx int) {
// amd64:`BSWAPQ` // amd64:`BSWAPQ`
// s390x:`MOVD\s\(.*\)\(.*\*1\),` // s390x:`MOVD\s\(.*\)\(.*\*1\),`
// arm64:`REV` // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
sink64 = binary.BigEndian.Uint64(b[idx:]) sink64 = binary.BigEndian.Uint64(b[idx:])
} }
func load_be32(b []byte) { func load_be32(b []byte) {
// amd64:`BSWAPL` // amd64:`BSWAPL`
// s390x:`MOVWZ\s\(.*\),` // s390x:`MOVWZ\s\(.*\),`
// arm64:`REVW` // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
sink32 = binary.BigEndian.Uint32(b) sink32 = binary.BigEndian.Uint32(b)
} }
func load_be32_idx(b []byte, idx int) { func load_be32_idx(b []byte, idx int) {
// amd64:`BSWAPL` // amd64:`BSWAPL`
// s390x:`MOVWZ\s\(.*\)\(.*\*1\),` // s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
// arm64:`REVW` // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
sink32 = binary.BigEndian.Uint32(b[idx:]) sink32 = binary.BigEndian.Uint32(b[idx:])
} }
func load_be16(b []byte) { func load_be16(b []byte) {
// amd64:`ROLW\s\$8` // amd64:`ROLW\s\$8`
// arm64: `REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
sink16 = binary.BigEndian.Uint16(b) sink16 = binary.BigEndian.Uint16(b)
} }
func load_be16_idx(b []byte, idx int) { func load_be16_idx(b []byte, idx int) {
// amd64:`ROLW\s\$8` // amd64:`ROLW\s\$8`
// arm64: `REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
sink16 = binary.BigEndian.Uint16(b[idx:]) sink16 = binary.BigEndian.Uint16(b[idx:])
} }
...@@ -162,7 +166,7 @@ func store_le64(b []byte) { ...@@ -162,7 +166,7 @@ func store_le64(b []byte) {
func store_le64_idx(b []byte, idx int) { func store_le64_idx(b []byte, idx int) {
// amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.` // amd64:`MOVQ\s.*\(.*\)\(.*\*1\)$`,-`SHR.`
// arm64:`MOVD`,-`MOV[WBH]` // arm64:`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`
// ppc64le:`MOVD\s`,-`MOV[BHW]\s` // ppc64le:`MOVD\s`,-`MOV[BHW]\s`
binary.LittleEndian.PutUint64(b[idx:], sink64) binary.LittleEndian.PutUint64(b[idx:], sink64)
} }
...@@ -176,7 +180,7 @@ func store_le32(b []byte) { ...@@ -176,7 +180,7 @@ func store_le32(b []byte) {
func store_le32_idx(b []byte, idx int) { func store_le32_idx(b []byte, idx int) {
// amd64:`MOVL\s` // amd64:`MOVL\s`
// arm64:`MOVW`,-`MOV[BH]` // arm64:`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`
// ppc64le:`MOVW\s` // ppc64le:`MOVW\s`
binary.LittleEndian.PutUint32(b[idx:], sink32) binary.LittleEndian.PutUint32(b[idx:], sink32)
} }
...@@ -190,32 +194,32 @@ func store_le16(b []byte) { ...@@ -190,32 +194,32 @@ func store_le16(b []byte) {
func store_le16_idx(b []byte, idx int) { func store_le16_idx(b []byte, idx int) {
// amd64:`MOVW\s` // amd64:`MOVW\s`
// arm64:`MOVH`,-`MOVB` // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
// ppc64le(DISABLED):`MOVH\s` // ppc64le(DISABLED):`MOVH\s`
binary.LittleEndian.PutUint16(b[idx:], sink16) binary.LittleEndian.PutUint16(b[idx:], sink16)
} }
func store_be64(b []byte) { func store_be64(b []byte) {
// amd64:`BSWAPQ`,-`SHR.` // amd64:`BSWAPQ`,-`SHR.`
// arm64:`MOVD`,`REV`,-`MOV[WBH]` // arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
binary.BigEndian.PutUint64(b, sink64) binary.BigEndian.PutUint64(b, sink64)
} }
func store_be64_idx(b []byte, idx int) { func store_be64_idx(b []byte, idx int) {
// amd64:`BSWAPQ`,-`SHR.` // amd64:`BSWAPQ`,-`SHR.`
// arm64:`MOVD`,`REV`,-`MOV[WBH]` // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
binary.BigEndian.PutUint64(b[idx:], sink64) binary.BigEndian.PutUint64(b[idx:], sink64)
} }
func store_be32(b []byte) { func store_be32(b []byte) {
// amd64:`BSWAPL`,-`SHR.` // amd64:`BSWAPL`,-`SHR.`
// arm64:`MOVW`,`REVW`,-`MOV[BH]` // arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
binary.BigEndian.PutUint32(b, sink32) binary.BigEndian.PutUint32(b, sink32)
} }
func store_be32_idx(b []byte, idx int) { func store_be32_idx(b []byte, idx int) {
// amd64:`BSWAPL`,-`SHR.` // amd64:`BSWAPL`,-`SHR.`
// arm64:`MOVW`,`REVW`,-`MOV[BH]` // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
binary.BigEndian.PutUint32(b[idx:], sink32) binary.BigEndian.PutUint32(b[idx:], sink32)
} }
...@@ -227,7 +231,7 @@ func store_be16(b []byte) { ...@@ -227,7 +231,7 @@ func store_be16(b []byte) {
func store_be16_idx(b []byte, idx int) { func store_be16_idx(b []byte, idx int) {
// amd64:`ROLW\s\$8`,-`SHR.` // amd64:`ROLW\s\$8`,-`SHR.`
// arm64:`MOVH`,`REV16W`,-`MOVB` // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
binary.BigEndian.PutUint16(b[idx:], sink16) binary.BigEndian.PutUint16(b[idx:], sink16)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment