Commit 2bf7a925 authored by Meng Zhuo's avatar Meng Zhuo Committed by Cherry Zhang

cmd/asm: add VLD[1-4]R vector instructions on arm64

This change adds VLD1R, VLD2R, VLD3R, VLD4R

Change-Id: Ie19e9ae02fdfc94b9344acde8c9938849efb0bf0
Reviewed-on: https://go-review.googlesource.com/c/go/+/181697
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 5fe3b49a
...@@ -352,6 +352,18 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8 ...@@ -352,6 +352,18 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VLD4 (R15), [V10.H4, V11.H4, V12.H4, V13.H4] // ea05400c VLD4 (R15), [V10.H4, V11.H4, V12.H4, V13.H4] // ea05400c
VLD4.P 32(R24), [V31.B8, V0.B8, V1.B8, V2.B8] // 1f03df0c VLD4.P 32(R24), [V31.B8, V0.B8, V1.B8, V2.B8] // 1f03df0c
VLD4.P (R13)(R9), [V14.S2, V15.S2, V16.S2, V17.S2] // VLD4.P (R13)(R9*1), [V14.S2,V15.S2,V16.S2,V17.S2] // ae09c90c VLD4.P (R13)(R9), [V14.S2, V15.S2, V16.S2, V17.S2] // VLD4.P (R13)(R9*1), [V14.S2,V15.S2,V16.S2,V17.S2] // ae09c90c
VLD1R (R0), [V0.B16] // 00c0404d
VLD1R.P 16(R0), [V0.B16] // 00c0df4d
VLD1R.P (R15)(R1), [V15.H4] // VLD1R.P (R15)(R1*1), [V15.H4] // efc5c10d
VLD2R (R15), [V15.H4, V16.H4] // efc5600d
VLD2R.P 32(R0), [V0.D2, V1.D2] // 00ccff4d
VLD2R.P (R0)(R5), [V31.D1, V0.D1] // VLD2R.P (R0)(R5*1), [V31.D1, V0.D1] // 1fcce50d
VLD3R (RSP), [V31.S2, V0.S2, V1.S2] // ffeb400d
VLD3R.P 24(R15), [V15.H4, V16.H4, V17.H4] // efe5df0d
VLD3R.P (R15)(R6), [V15.H8, V16.H8, V17.H8] // VLD3R.P (R15)(R6*1), [V15.H8, V16.H8, V17.H8] // efe5c64d
VLD4R (R0), [V0.B8, V1.B8, V2.B8, V3.B8] // 00e0600d
VLD4R.P 64(RSP), [V31.S4, V0.S4, V1.S4, V2.S4] // ffebff4d
VLD4R.P (R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4] // VLD4R.P (R15)(R9*1), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d
VST1.P [V24.S2], 8(R2) // 58789f0c VST1.P [V24.S2], 8(R2) // 58789f0c
VST1 [V29.S2, V30.S2], (R29) // bdab000c VST1 [V29.S2, V30.S2], (R29) // bdab000c
VST1 [V14.H4, V15.H4, V16.H4], (R27) // 6e67000c VST1 [V14.H4, V15.H4, V16.H4], (R27) // 6e67000c
......
...@@ -956,6 +956,10 @@ const ( ...@@ -956,6 +956,10 @@ const (
AVLD2 AVLD2
AVLD3 AVLD3
AVLD4 AVLD4
AVLD1R
AVLD2R
AVLD3R
AVLD4R
AVORR AVORR
AVREV32 AVREV32
AVREV64 AVREV64
......
...@@ -463,6 +463,10 @@ var Anames = []string{ ...@@ -463,6 +463,10 @@ var Anames = []string{
"VLD2", "VLD2",
"VLD3", "VLD3",
"VLD4", "VLD4",
"VLD1R",
"VLD2R",
"VLD3R",
"VLD4R",
"VORR", "VORR",
"VREV32", "VREV32",
"VREV64", "VREV64",
......
...@@ -784,15 +784,9 @@ var optab = []Optab{ ...@@ -784,15 +784,9 @@ var optab = []Optab{
{AVLD1, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, {AVLD1, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD2, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0}, {AVLD1R, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD2, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1R, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD2, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST}, {AVLD1R, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD3, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD3, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD3, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD4, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD4, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD4, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, {AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST},
{AVLD1, C_ROFF, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST}, {AVLD1, C_ROFF, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, 0}, {AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, 0},
...@@ -2709,13 +2703,18 @@ func buildop(ctxt *obj.Link) { ...@@ -2709,13 +2703,18 @@ func buildop(ctxt *obj.Link) {
case AVZIP1: case AVZIP1:
oprangeset(AVZIP2, t) oprangeset(AVZIP2, t)
case AVLD1R:
oprangeset(AVLD2, t)
oprangeset(AVLD2R, t)
oprangeset(AVLD3, t)
oprangeset(AVLD3R, t)
oprangeset(AVLD4, t)
oprangeset(AVLD4R, t)
case ASHA1H, case ASHA1H,
AVCNT, AVCNT,
AVMOV, AVMOV,
AVLD1, AVLD1,
AVLD2,
AVLD3,
AVLD4,
AVST1, AVST1,
AVST2, AVST2,
AVST3, AVST3,
...@@ -2803,7 +2802,7 @@ func (c *ctxt7) checkindex(p *obj.Prog, index, maxindex int) { ...@@ -2803,7 +2802,7 @@ func (c *ctxt7) checkindex(p *obj.Prog, index, maxindex int) {
func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) { func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) {
var offset, list, n, expect int64 var offset, list, n, expect int64
switch as { switch as {
case AVLD1, AVLD2, AVLD3, AVLD4: case AVLD1, AVLD2, AVLD3, AVLD4, AVLD1R, AVLD2R, AVLD3R, AVLD4R:
offset = p.From.Offset offset = p.From.Offset
list = p.To.Offset list = p.To.Offset
case AVST1, AVST2, AVST3, AVST4: case AVST1, AVST2, AVST3, AVST4:
...@@ -2836,11 +2835,13 @@ func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) { ...@@ -2836,11 +2835,13 @@ func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) {
switch as { switch as {
case AVLD1, AVST1: case AVLD1, AVST1:
return return
case AVLD2, AVST2: case AVLD1R:
expect = 1
case AVLD2, AVST2, AVLD2R:
expect = 2 expect = 2
case AVLD3, AVST3: case AVLD3, AVST3, AVLD3R:
expect = 3 expect = 3
case AVLD4, AVST4: case AVLD4, AVST4, AVLD4R:
expect = 4 expect = 4
} }
...@@ -4344,10 +4345,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { ...@@ -4344,10 +4345,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
} }
o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31) o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31)
case 81: /* vld[1-4] (Rn), [Vt1.<T>, Vt2.<T>, ...] */ case 81: /* vld[1-4]|vld[1-4]r (Rn), [Vt1.<T>, Vt2.<T>, ...] */
c.checkoffset(p, p.As) c.checkoffset(p, p.As)
r := int(p.From.Reg) r := int(p.From.Reg)
o1 = 3<<26 | 1<<22 o1 = c.oprrr(p, p.As)
if o.scond == C_XPOST { if o.scond == C_XPOST {
o1 |= 1 << 23 o1 |= 1 << 23
if p.From.Index == 0 { if p.From.Index == 0 {
...@@ -4358,7 +4359,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) { ...@@ -4358,7 +4359,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
if isRegShiftOrExt(&p.From) { if isRegShiftOrExt(&p.From) {
c.ctxt.Diag("invalid extended register op: %v\n", p) c.ctxt.Diag("invalid extended register op: %v\n", p)
} }
o1 |= uint32(p.From.Index&31) << 16 o1 |= uint32(p.From.Index&0x1f) << 16
} }
} }
o1 |= uint32(p.To.Offset) o1 |= uint32(p.To.Offset)
...@@ -5591,6 +5592,15 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 { ...@@ -5591,6 +5592,15 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AVRBIT: case AVRBIT:
return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10 return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10
case AVLD1, AVLD2, AVLD3, AVLD4:
return 3<<26 | 1<<22
case AVLD1R, AVLD3R:
return 0xD<<24 | 1<<22
case AVLD2R, AVLD4R:
return 0xD<<24 | 3<<21
} }
c.ctxt.Diag("%v: bad rrr %d %v", p, a, a) c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
...@@ -6779,6 +6789,10 @@ func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 { ...@@ -6779,6 +6789,10 @@ func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 {
o1 &^= 0xf000 // mask out "opcode" field (bit 12-15) o1 &^= 0xf000 // mask out "opcode" field (bit 12-15)
switch p.As { switch p.As {
case AVLD1R, AVLD2R:
o1 |= 0xC << 12
case AVLD3R, AVLD4R:
o1 |= 0xE << 12
case AVLD2, AVST2: case AVLD2, AVST2:
o1 |= 8 << 12 o1 |= 8 << 12
case AVLD3, AVST3: case AVLD3, AVST3:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment