Commit 39333025 authored by Ben Shi's avatar Ben Shi

cmd/compile: add indexed form for several 386 instructions

This CL implements indexed memory operands for the following instructions.
(ADD|SUB|MUL|AND|OR|XOR)Lload -> (ADD|SUB|MUL|AND|OR|XOR)Lloadidx4
(ADD|SUB|AND|OR|XOR)Lmodify -> (ADD|SUB|AND|OR|XOR)Lmodifyidx4
(ADD|AND|OR|XOR)Lconstmodify -> (ADD|AND|OR|XOR)Lconstmodifyidx4

1. The total size of pkg/linux_386/ decreases about 2.5KB, excluding
cmd/compile/ .

2. There is little regression in the go1 benchmark test, excluding noise.
name                     old time/op    new time/op    delta
BinaryTree17-4              3.25s ± 3%     3.25s ± 3%    ~     (p=0.218 n=40+40)
Fannkuch11-4                3.53s ± 1%     3.53s ± 1%    ~     (p=0.303 n=40+40)
FmtFprintfEmpty-4          44.9ns ± 3%    45.6ns ± 3%  +1.48%  (p=0.030 n=40+36)
FmtFprintfString-4         78.7ns ± 5%    80.1ns ± 7%    ~     (p=0.217 n=36+40)
FmtFprintfInt-4            90.2ns ± 6%    89.8ns ± 5%    ~     (p=0.659 n=40+38)
FmtFprintfIntInt-4          140ns ± 5%     141ns ± 5%  +1.00%  (p=0.027 n=40+40)
FmtFprintfPrefixedInt-4     185ns ± 3%     183ns ± 3%    ~     (p=0.104 n=40+40)
FmtFprintfFloat-4           411ns ± 4%     406ns ± 3%  -1.37%  (p=0.005 n=40+40)
FmtManyArgs-4               590ns ± 4%     598ns ± 4%  +1.35%  (p=0.008 n=40+40)
GobDecode-4                7.16ms ± 5%    7.10ms ± 5%    ~     (p=0.335 n=40+40)
GobEncode-4                6.85ms ± 7%    6.74ms ± 9%    ~     (p=0.058 n=38+40)
Gzip-4                      400ms ± 4%     399ms ± 2%  -0.34%  (p=0.003 n=40+33)
Gunzip-4                   41.4ms ± 3%    41.4ms ± 4%  -0.12%  (p=0.020 n=40+40)
HTTPClientServer-4         64.1µs ± 4%    63.5µs ± 2%  -1.07%  (p=0.000 n=39+37)
JSONEncode-4               15.9ms ± 2%    15.9ms ± 3%    ~     (p=0.103 n=40+40)
JSONDecode-4               62.2ms ± 4%    61.6ms ± 3%  -0.98%  (p=0.006 n=39+40)
Mandelbrot200-4            5.18ms ± 3%    5.14ms ± 4%    ~     (p=0.125 n=40+40)
GoParse-4                  3.29ms ± 2%    3.27ms ± 2%  -0.66%  (p=0.006 n=40+40)
RegexpMatchEasy0_32-4       103ns ± 4%     103ns ± 4%    ~     (p=0.632 n=40+40)
RegexpMatchEasy0_1K-4       830ns ± 3%     828ns ± 3%    ~     (p=0.563 n=40+40)
RegexpMatchEasy1_32-4       113ns ± 4%     113ns ± 4%    ~     (p=0.494 n=40+40)
RegexpMatchEasy1_1K-4      1.03µs ± 4%    1.03µs ± 4%    ~     (p=0.665 n=40+40)
RegexpMatchMedium_32-4      130ns ± 4%     129ns ± 3%    ~     (p=0.458 n=40+40)
RegexpMatchMedium_1K-4     39.4µs ± 3%    39.7µs ± 3%    ~     (p=0.825 n=40+40)
RegexpMatchHard_32-4       2.16µs ± 4%    2.15µs ± 4%    ~     (p=0.137 n=40+40)
RegexpMatchHard_1K-4       65.2µs ± 3%    65.4µs ± 4%    ~     (p=0.160 n=40+40)
Revcomp-4                   1.87s ± 2%     1.87s ± 1%  +0.17%  (p=0.019 n=33+33)
Template-4                 69.4ms ± 3%    69.8ms ± 3%  +0.60%  (p=0.009 n=40+40)
TimeParse-4                 437ns ± 4%     438ns ± 4%    ~     (p=0.234 n=40+40)
TimeFormat-4                408ns ± 3%     408ns ± 3%    ~     (p=0.904 n=40+40)
[Geo mean]                 65.7µs         65.6µs       -0.08%

name                     old speed      new speed      delta
GobDecode-4               107MB/s ± 5%   108MB/s ± 5%    ~     (p=0.336 n=40+40)
GobEncode-4               112MB/s ± 6%   114MB/s ± 9%  +1.95%  (p=0.036 n=37+40)
Gzip-4                   48.5MB/s ± 4%  48.6MB/s ± 2%  +0.28%  (p=0.003 n=40+33)
Gunzip-4                  469MB/s ± 4%   469MB/s ± 4%  +0.11%  (p=0.021 n=40+40)
JSONEncode-4              122MB/s ± 2%   122MB/s ± 3%    ~     (p=0.105 n=40+40)
JSONDecode-4             31.2MB/s ± 4%  31.5MB/s ± 4%  +0.99%  (p=0.007 n=39+40)
GoParse-4                17.6MB/s ± 2%  17.7MB/s ± 2%  +0.66%  (p=0.007 n=40+40)
RegexpMatchEasy0_32-4     310MB/s ± 4%   310MB/s ± 4%    ~     (p=0.384 n=40+40)
RegexpMatchEasy0_1K-4    1.23GB/s ± 3%  1.24GB/s ± 3%    ~     (p=0.186 n=40+40)
RegexpMatchEasy1_32-4     283MB/s ± 3%   281MB/s ± 4%    ~     (p=0.855 n=40+40)
RegexpMatchEasy1_1K-4    1.00GB/s ± 4%  1.00GB/s ± 4%    ~     (p=0.665 n=40+40)
RegexpMatchMedium_32-4   7.68MB/s ± 4%  7.73MB/s ± 3%    ~     (p=0.359 n=40+40)
RegexpMatchMedium_1K-4   26.0MB/s ± 3%  25.8MB/s ± 3%    ~     (p=0.825 n=40+40)
RegexpMatchHard_32-4     14.8MB/s ± 3%  14.9MB/s ± 4%    ~     (p=0.136 n=40+40)
RegexpMatchHard_1K-4     15.7MB/s ± 3%  15.7MB/s ± 4%    ~     (p=0.150 n=40+40)
Revcomp-4                 136MB/s ± 1%   136MB/s ± 1%  -0.09%  (p=0.028 n=32+33)
Template-4               28.0MB/s ± 3%  27.8MB/s ± 3%  -0.59%  (p=0.010 n=40+40)
[Geo mean]               82.1MB/s       82.3MB/s       +0.25%

Change-Id: Ifa387a251056678326d3508aa02753b70bf7e5d0
Reviewed-on: https://go-review.googlesource.com/c/140303
Run-TryBot: Ben Shi <powerman1st@163.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent 2afdd17e
......@@ -627,14 +627,26 @@
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val (ADDLconst [off2] base) idx mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {sym} val base idx mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym} val base (ADDLconst [off2] idx) mem) && is32Bit(off1+off2*4) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2*4] {sym} val base idx mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SSload [off1+off2] {sym} val base mem)
((ADD|SUB|MUL|DIV)SDload [off1] {sym} val (ADDLconst [off2] base) mem) && is32Bit(off1+off2) ->
((ADD|SUB|MUL|DIV)SDload [off1+off2] {sym} val base mem)
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym} (ADDLconst [off2] base) val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {sym} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} (ADDLconst [off2] base) idx val mem) && is32Bit(off1+off2) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {sym} base idx val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym} base (ADDLconst [off2] idx) val mem) && is32Bit(off1+off2*4) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2*4] {sym} base idx val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym} (ADDLconst [off2] base) mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {sym} base mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} (ADDLconst [off2] base) idx mem) && ValAndOff(valoff1).canAdd(off2) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {sym} base idx mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym} base (ADDLconst [off2] idx) mem) && ValAndOff(valoff1).canAdd(off2*4) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2*4)] {sym} base idx mem)
// Fold constants into stores.
(MOVLstore [off] {sym} ptr (MOVLconst [c]) mem) && validOff(off) ->
......@@ -690,6 +702,9 @@
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1] {sym1} val (LEAL [off2] {sym2} base) idx mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val base idx mem)
((ADD|SUB|MUL|DIV)SSload [off1] {sym1} val (LEAL [off2] {sym2} base) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|MUL|DIV)SSload [off1+off2] {mergeSym(sym1,sym2)} val base mem)
......@@ -699,9 +714,15 @@
((ADD|SUB|AND|OR|XOR)Lmodify [off1] {sym1} (LEAL [off2] {sym2} base) val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off1+off2] {mergeSym(sym1,sym2)} base val mem)
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1] {sym1} (LEAL [off2] {sym2} base) idx val mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off1+off2] {mergeSym(sym1,sym2)} base idx val mem)
((ADD|AND|OR|XOR)Lconstmodify [valoff1] {sym1} (LEAL [off2] {sym2} base) mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|AND|OR|XOR)Lconstmodify [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base mem)
((ADD|AND|OR|XOR)Lconstmodifyidx4 [valoff1] {sym1} (LEAL [off2] {sym2} base) idx mem)
&& ValAndOff(valoff1).canAdd(off2) && canMergeSym(sym1, sym2) && (base.Op != OpSB || !config.ctxt.Flag_shared) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [ValAndOff(valoff1).add(off2)] {mergeSym(sym1,sym2)} base idx mem)
(MOV(B|W|L|SS|SD)load [off] {sym} (ADDL ptr idx) mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)loadidx1 [off] {sym} ptr idx mem)
(MOV(B|W|L|SS|SD)store [off] {sym} (ADDL ptr idx) val mem) && ptr.Op != OpSB -> (MOV(B|W|L|SS|SD)storeidx1 [off] {sym} ptr idx val mem)
......@@ -746,14 +767,30 @@
// Merge load/store to op
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && clobber(l) -> ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLloadidx4 [off] {sym} ptr idx mem)) && canMergeLoad(v, l, x) && clobber(l) ->
((ADD|AND|OR|XOR|SUB|MUL)Lloadidx4 x [off] {sym} ptr idx mem)
((ADD|SUB|MUL|AND|OR|XOR)Lload [off1] {sym1} val (LEAL4 [off2] {sym2} ptr idx) mem)
&& is32Bit(off1+off2) && canMergeSym(sym1, sym2) ->
((ADD|SUB|MUL|AND|OR|XOR)Lloadidx4 [off1+off2] {mergeSym(sym1,sym2)} val ptr idx mem)
((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoad(v, l, x) && !config.use387 && clobber(l) -> ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) -> ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lloadidx4 x [off] {sym} ptr idx mem) mem) && y.Uses==1 && clobber(y) ->
((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|SUB|AND|OR|XOR)L l:(MOVLloadidx4 [off] {sym} ptr idx mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) ->
((ADD|SUB|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx x mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLload [off] {sym} ptr mem)) mem)
&& y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodify [makeValAndOff(c,off)] {sym} ptr mem)
(MOVLstoreidx4 {sym} [off] ptr idx y:((ADD|AND|OR|XOR)Lconst [c] l:(MOVLloadidx4 [off] {sym} ptr idx mem)) mem)
&& y.Uses==1 && l.Uses==1 && clobber(y) && clobber(l) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
((ADD|AND|OR|XOR)Lmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(c,off) ->
((ADD|AND|OR|XOR)Lconstmodifyidx4 [makeValAndOff(c,off)] {sym} ptr idx mem)
(SUBLmodifyidx4 [off] {sym} ptr idx (MOVLconst [c]) mem) && validValAndOff(-c,off) ->
(ADDLconstmodifyidx4 [makeValAndOff(-c,off)] {sym} ptr idx mem)
(MOV(B|W|L)storeconstidx1 [x] {sym} (ADDLconst [c] ptr) idx mem) ->
(MOV(B|W|L)storeconstidx1 [ValAndOff(x).add(c)] {sym} ptr idx mem)
......
......@@ -126,9 +126,10 @@ func init() {
readflags = regInfo{inputs: nil, outputs: gponly}
flagsgpax = regInfo{inputs: nil, clobbers: ax, outputs: []regMask{gp &^ ax}}
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
gpload = regInfo{inputs: []regMask{gpspsb, 0}, outputs: gponly}
gp21load = regInfo{inputs: []regMask{gp, gpspsb, 0}, outputs: gponly}
gploadidx = regInfo{inputs: []regMask{gpspsb, gpsp, 0}, outputs: gponly}
gp21loadidx = regInfo{inputs: []regMask{gp, gpspsb, gpsp, 0}, outputs: gponly}
gpstore = regInfo{inputs: []regMask{gpspsb, gpsp, 0}}
gpstoreconst = regInfo{inputs: []regMask{gpspsb, 0}}
......@@ -281,6 +282,7 @@ func init() {
{name: "ROLWconst", argLength: 1, reg: gp11, asm: "ROLW", aux: "Int16", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-15
{name: "ROLBconst", argLength: 1, reg: gp11, asm: "ROLB", aux: "Int8", resultInArg0: true, clobberFlags: true}, // arg0 rotate left auxint, rotate amount 0-7
// binary-op with a memory source operand
{name: "ADDLload", argLength: 3, reg: gp21load, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
{name: "SUBLload", argLength: 3, reg: gp21load, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
{name: "MULLload", argLength: 3, reg: gp21load, asm: "IMULL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
......@@ -288,6 +290,14 @@ func init() {
{name: "ORLload", argLength: 3, reg: gp21load, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
{name: "XORLload", argLength: 3, reg: gp21load, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+auxint+aux, arg2 = mem
// binary-op with an indexed memory source operand
{name: "ADDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ADDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 + tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem
{name: "SUBLloadidx4", argLength: 4, reg: gp21loadidx, asm: "SUBL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 - tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem
{name: "MULLloadidx4", argLength: 4, reg: gp21loadidx, asm: "IMULL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 * tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem
{name: "ANDLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ANDL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 & tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem
{name: "ORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "ORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 | tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem
{name: "XORLloadidx4", argLength: 4, reg: gp21loadidx, asm: "XORL", aux: "SymOff", resultInArg0: true, clobberFlags: true, faultOnNilArg1: true, symEffect: "Read"}, // arg0 ^ tmp, tmp loaded from arg1+arg2*4+auxint+aux, arg3 = mem
// unary ops
{name: "NEGL", argLength: 1, reg: gp11, asm: "NEGL", resultInArg0: true, clobberFlags: true}, // -arg0
......@@ -367,12 +377,25 @@ func init() {
{name: "ORLmodify", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) |= arg1, arg2=mem
{name: "XORLmodify", argLength: 3, reg: gpstore, asm: "XORL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+auxint+aux) ^= arg1, arg2=mem
// direct binary-op on indexed memory (read-modify-write)
{name: "ADDLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ADDL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) += arg2, arg3=mem
{name: "SUBLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "SUBL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) -= arg2, arg3=mem
{name: "ANDLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ANDL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) &= arg2, arg3=mem
{name: "ORLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "ORL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) |= arg2, arg3=mem
{name: "XORLmodifyidx4", argLength: 4, reg: gpstoreidx, asm: "XORL", aux: "SymOff", typ: "Mem", faultOnNilArg0: true, clobberFlags: true, symEffect: "Read,Write"}, // *(arg0+arg1*4+auxint+aux) ^= arg2, arg3=mem
// direct binary-op on memory with a constant (read-modify-write)
{name: "ADDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ADDL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // add ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "ANDLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ANDL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "ORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "ORL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
{name: "XORLconstmodify", argLength: 2, reg: gpstoreconst, asm: "XORL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+ValAndOff(AuxInt).Off()+aux, arg1=mem
// direct binary-op on indexed memory with a constant (read-modify-write)
{name: "ADDLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ADDL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // add ValAndOff(AuxInt).Val() to arg0+arg1*4+ValAndOff(AuxInt).Off()+aux, arg2=mem
{name: "ANDLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ANDL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // and ValAndOff(AuxInt).Val() to arg0+arg1*4+ValAndOff(AuxInt).Off()+aux, arg2=mem
{name: "ORLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "ORL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // or ValAndOff(AuxInt).Val() to arg0+arg1*4+ValAndOff(AuxInt).Off()+aux, arg2=mem
{name: "XORLconstmodifyidx4", argLength: 3, reg: gpstoreconstidx, asm: "XORL", aux: "SymValAndOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // xor ValAndOff(AuxInt).Val() to arg0+arg1*4+ValAndOff(AuxInt).Off()+aux, arg2=mem
// indexed loads/stores
{name: "MOVBloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVBLZX", aux: "SymOff", symEffect: "Read"}, // load a byte from arg0+arg1+auxint+aux. arg2=mem
{name: "MOVWloadidx1", argLength: 3, reg: gploadidx, commutative: true, asm: "MOVWLZX", aux: "SymOff", symEffect: "Read"}, // load 2 bytes from arg0+arg1+auxint+aux. arg2=mem
......
......@@ -339,6 +339,12 @@ const (
Op386ANDLload
Op386ORLload
Op386XORLload
Op386ADDLloadidx4
Op386SUBLloadidx4
Op386MULLloadidx4
Op386ANDLloadidx4
Op386ORLloadidx4
Op386XORLloadidx4
Op386NEGL
Op386NOTL
Op386BSFL
......@@ -394,10 +400,19 @@ const (
Op386ANDLmodify
Op386ORLmodify
Op386XORLmodify
Op386ADDLmodifyidx4
Op386SUBLmodifyidx4
Op386ANDLmodifyidx4
Op386ORLmodifyidx4
Op386XORLmodifyidx4
Op386ADDLconstmodify
Op386ANDLconstmodify
Op386ORLconstmodify
Op386XORLconstmodify
Op386ADDLconstmodifyidx4
Op386ANDLconstmodifyidx4
Op386ORLconstmodifyidx4
Op386XORLconstmodifyidx4
Op386MOVBloadidx1
Op386MOVWloadidx1
Op386MOVWloadidx2
......@@ -4019,6 +4034,126 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ADDLloadidx4",
auxType: auxSymOff,
argLen: 4,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
symEffect: SymRead,
asm: x86.AADDL,
reg: regInfo{
inputs: []inputInfo{
{0, 239}, // AX CX DX BX BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{1, 65791}, // AX CX DX BX SP BP SI DI SB
},
outputs: []outputInfo{
{0, 239}, // AX CX DX BX BP SI DI
},
},
},
{
name: "SUBLloadidx4",
auxType: auxSymOff,
argLen: 4,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
symEffect: SymRead,
asm: x86.ASUBL,
reg: regInfo{
inputs: []inputInfo{
{0, 239}, // AX CX DX BX BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{1, 65791}, // AX CX DX BX SP BP SI DI SB
},
outputs: []outputInfo{
{0, 239}, // AX CX DX BX BP SI DI
},
},
},
{
name: "MULLloadidx4",
auxType: auxSymOff,
argLen: 4,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
symEffect: SymRead,
asm: x86.AIMULL,
reg: regInfo{
inputs: []inputInfo{
{0, 239}, // AX CX DX BX BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{1, 65791}, // AX CX DX BX SP BP SI DI SB
},
outputs: []outputInfo{
{0, 239}, // AX CX DX BX BP SI DI
},
},
},
{
name: "ANDLloadidx4",
auxType: auxSymOff,
argLen: 4,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
symEffect: SymRead,
asm: x86.AANDL,
reg: regInfo{
inputs: []inputInfo{
{0, 239}, // AX CX DX BX BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{1, 65791}, // AX CX DX BX SP BP SI DI SB
},
outputs: []outputInfo{
{0, 239}, // AX CX DX BX BP SI DI
},
},
},
{
name: "ORLloadidx4",
auxType: auxSymOff,
argLen: 4,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
symEffect: SymRead,
asm: x86.AORL,
reg: regInfo{
inputs: []inputInfo{
{0, 239}, // AX CX DX BX BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{1, 65791}, // AX CX DX BX SP BP SI DI SB
},
outputs: []outputInfo{
{0, 239}, // AX CX DX BX BP SI DI
},
},
},
{
name: "XORLloadidx4",
auxType: auxSymOff,
argLen: 4,
resultInArg0: true,
clobberFlags: true,
faultOnNilArg1: true,
symEffect: SymRead,
asm: x86.AXORL,
reg: regInfo{
inputs: []inputInfo{
{0, 239}, // AX CX DX BX BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{1, 65791}, // AX CX DX BX SP BP SI DI SB
},
outputs: []outputInfo{
{0, 239}, // AX CX DX BX BP SI DI
},
},
},
{
name: "NEGL",
argLen: 1,
......@@ -4743,6 +4878,86 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ADDLmodifyidx4",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AADDL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "SUBLmodifyidx4",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.ASUBL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "ANDLmodifyidx4",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AANDL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "ORLmodifyidx4",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AORL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "XORLmodifyidx4",
auxType: auxSymOff,
argLen: 4,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AXORL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{2, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "ADDLconstmodify",
auxType: auxSymValAndOff,
......@@ -4799,6 +5014,66 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "ADDLconstmodifyidx4",
auxType: auxSymValAndOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AADDL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "ANDLconstmodifyidx4",
auxType: auxSymValAndOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AANDL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "ORLconstmodifyidx4",
auxType: auxSymValAndOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AORL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "XORLconstmodifyidx4",
auxType: auxSymValAndOff,
argLen: 3,
clobberFlags: true,
faultOnNilArg0: true,
symEffect: SymRead | SymWrite,
asm: x86.AXORL,
reg: regInfo{
inputs: []inputInfo{
{1, 255}, // AX CX DX BX SP BP SI DI
{0, 65791}, // AX CX DX BX SP BP SI DI SB
},
},
},
{
name: "MOVBloadidx1",
auxType: auxSymOff,
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -508,6 +508,19 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.Op386ADDLloadidx4, ssa.Op386SUBLloadidx4, ssa.Op386MULLloadidx4,
ssa.Op386ANDLloadidx4, ssa.Op386ORLloadidx4, ssa.Op386XORLloadidx4:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[1].Reg()
p.From.Index = v.Args[2].Reg()
p.From.Scale = 4
gc.AddAux(&p.From, v)
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
if v.Reg() != v.Args[0].Reg() {
v.Fatalf("input[0] and output not in same register %s", v.LongString())
}
case ssa.Op386ADDLload, ssa.Op386SUBLload, ssa.Op386MULLload,
ssa.Op386ANDLload, ssa.Op386ORLload, ssa.Op386XORLload,
ssa.Op386ADDSDload, ssa.Op386ADDSSload, ssa.Op386SUBSDload, ssa.Op386SUBSSload,
......@@ -557,7 +570,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = v.Args[0].Reg()
gc.AddAux2(&p.To, v, off)
case ssa.Op386MOVBstoreidx1, ssa.Op386MOVWstoreidx1, ssa.Op386MOVLstoreidx1, ssa.Op386MOVSSstoreidx1, ssa.Op386MOVSDstoreidx1,
ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2:
ssa.Op386MOVSDstoreidx8, ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4, ssa.Op386MOVWstoreidx2,
ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
r := v.Args[0].Reg()
i := v.Args[1].Reg()
p := s.Prog(v.Op.Asm())
......@@ -572,7 +586,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Scale = 1
case ssa.Op386MOVSDstoreidx8:
p.To.Scale = 8
case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4:
case ssa.Op386MOVSSstoreidx4, ssa.Op386MOVLstoreidx4,
ssa.Op386ADDLmodifyidx4, ssa.Op386SUBLmodifyidx4, ssa.Op386ANDLmodifyidx4, ssa.Op386ORLmodifyidx4, ssa.Op386XORLmodifyidx4:
p.To.Scale = 4
case ssa.Op386MOVWstoreidx2:
p.To.Scale = 2
......@@ -588,7 +603,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
gc.AddAux2(&p.To, v, sc.Off())
case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1:
case ssa.Op386MOVLstoreconstidx1, ssa.Op386MOVLstoreconstidx4, ssa.Op386MOVWstoreconstidx1, ssa.Op386MOVWstoreconstidx2, ssa.Op386MOVBstoreconstidx1,
ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_CONST
sc := v.AuxValAndOff()
......@@ -603,7 +619,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
}
case ssa.Op386MOVWstoreconstidx2:
p.To.Scale = 2
case ssa.Op386MOVLstoreconstidx4:
case ssa.Op386MOVLstoreconstidx4,
ssa.Op386ADDLconstmodifyidx4, ssa.Op386ANDLconstmodifyidx4, ssa.Op386ORLconstmodifyidx4, ssa.Op386XORLconstmodifyidx4:
p.To.Scale = 4
}
p.To.Type = obj.TYPE_MEM
......
......@@ -14,7 +14,8 @@ package codegen
// Subtraction //
// ----------------- //
func SubMem(arr []int, b int) int {
var ef int
func SubMem(arr []int, b, c, d int) int {
// 386:`SUBL\s[A-Z]+,\s8\([A-Z]+\)`
// amd64:`SUBQ\s[A-Z]+,\s16\([A-Z]+\)`
arr[2] -= b
......@@ -25,6 +26,12 @@ func SubMem(arr []int, b int) int {
arr[4]--
// 386:`ADDL\s[$]-20,\s20\([A-Z]+\)`
arr[5] -= 20
// 386:`SUBL\s\([A-Z]+\)\([A-Z]+\*4\),\s[A-Z]+`
ef -= arr[b]
// 386:`SUBL\s[A-Z]+,\s\([A-Z]+\)\([A-Z]+\*4\)`
arr[c] -= b
// 386:`ADDL\s[$]-15,\s\([A-Z]+\)\([A-Z]+\*4\)`
arr[d] -= 15
// 386:"SUBL\t4"
// amd64:"SUBQ\t8"
return arr[0] - arr[1]
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment