Commit 2bf7a925 authored by Meng Zhuo's avatar Meng Zhuo Committed by Cherry Zhang

cmd/asm: add VLD[1-4]R vector instructions on arm64

This change adds VLD1R, VLD2R, VLD3R, VLD4R

Change-Id: Ie19e9ae02fdfc94b9344acde8c9938849efb0bf0
Reviewed-on: https://go-review.googlesource.com/c/go/+/181697
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 5fe3b49a
......@@ -352,6 +352,18 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
VLD4 (R15), [V10.H4, V11.H4, V12.H4, V13.H4] // ea05400c
VLD4.P 32(R24), [V31.B8, V0.B8, V1.B8, V2.B8] // 1f03df0c
VLD4.P (R13)(R9), [V14.S2, V15.S2, V16.S2, V17.S2] // VLD4.P (R13)(R9*1), [V14.S2,V15.S2,V16.S2,V17.S2] // ae09c90c
VLD1R (R0), [V0.B16] // 00c0404d
VLD1R.P 16(R0), [V0.B16] // 00c0df4d
VLD1R.P (R15)(R1), [V15.H4] // VLD1R.P (R15)(R1*1), [V15.H4] // efc5c10d
VLD2R (R15), [V15.H4, V16.H4] // efc5600d
VLD2R.P 32(R0), [V0.D2, V1.D2] // 00ccff4d
VLD2R.P (R0)(R5), [V31.D1, V0.D1] // VLD2R.P (R0)(R5*1), [V31.D1, V0.D1] // 1fcce50d
VLD3R (RSP), [V31.S2, V0.S2, V1.S2] // ffeb400d
VLD3R.P 24(R15), [V15.H4, V16.H4, V17.H4] // efe5df0d
VLD3R.P (R15)(R6), [V15.H8, V16.H8, V17.H8] // VLD3R.P (R15)(R6*1), [V15.H8, V16.H8, V17.H8] // efe5c64d
VLD4R (R0), [V0.B8, V1.B8, V2.B8, V3.B8] // 00e0600d
VLD4R.P 64(RSP), [V31.S4, V0.S4, V1.S4, V2.S4] // ffebff4d
VLD4R.P (R15)(R9), [V15.H4, V16.H4, V17.H4, V18.H4] // VLD4R.P (R15)(R9*1), [V15.H4, V16.H4, V17.H4, V18.H4] // efe5e90d
VST1.P [V24.S2], 8(R2) // 58789f0c
VST1 [V29.S2, V30.S2], (R29) // bdab000c
VST1 [V14.H4, V15.H4, V16.H4], (R27) // 6e67000c
......
......@@ -956,6 +956,10 @@ const (
AVLD2
AVLD3
AVLD4
AVLD1R
AVLD2R
AVLD3R
AVLD4R
AVORR
AVREV32
AVREV64
......
......@@ -463,6 +463,10 @@ var Anames = []string{
"VLD2",
"VLD3",
"VLD4",
"VLD1R",
"VLD2R",
"VLD3R",
"VLD4R",
"VORR",
"VREV32",
"VREV64",
......
......@@ -784,15 +784,9 @@ var optab = []Optab{
{AVLD1, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD2, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD2, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD2, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD3, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD3, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD3, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD4, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD4, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD4, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1R, C_ZOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, 0},
{AVLD1R, C_LOREG, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1R, C_ROFF, C_NONE, C_NONE, C_LIST, 81, 4, 0, 0, C_XPOST},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST},
{AVLD1, C_ROFF, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, C_XPOST},
{AVLD1, C_LOREG, C_NONE, C_NONE, C_ELEM, 97, 4, 0, 0, 0},
......@@ -2709,13 +2703,18 @@ func buildop(ctxt *obj.Link) {
case AVZIP1:
oprangeset(AVZIP2, t)
case AVLD1R:
oprangeset(AVLD2, t)
oprangeset(AVLD2R, t)
oprangeset(AVLD3, t)
oprangeset(AVLD3R, t)
oprangeset(AVLD4, t)
oprangeset(AVLD4R, t)
case ASHA1H,
AVCNT,
AVMOV,
AVLD1,
AVLD2,
AVLD3,
AVLD4,
AVST1,
AVST2,
AVST3,
......@@ -2803,7 +2802,7 @@ func (c *ctxt7) checkindex(p *obj.Prog, index, maxindex int) {
func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) {
var offset, list, n, expect int64
switch as {
case AVLD1, AVLD2, AVLD3, AVLD4:
case AVLD1, AVLD2, AVLD3, AVLD4, AVLD1R, AVLD2R, AVLD3R, AVLD4R:
offset = p.From.Offset
list = p.To.Offset
case AVST1, AVST2, AVST3, AVST4:
......@@ -2836,11 +2835,13 @@ func (c *ctxt7) checkoffset(p *obj.Prog, as obj.As) {
switch as {
case AVLD1, AVST1:
return
case AVLD2, AVST2:
case AVLD1R:
expect = 1
case AVLD2, AVST2, AVLD2R:
expect = 2
case AVLD3, AVST3:
case AVLD3, AVST3, AVLD3R:
expect = 3
case AVLD4, AVST4:
case AVLD4, AVST4, AVLD4R:
expect = 4
}
......@@ -4344,10 +4345,10 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
}
o1 |= (uint32(imm5&0x1f) << 16) | (uint32(rf&31) << 5) | uint32(rt&31)
case 81: /* vld[1-4] (Rn), [Vt1.<T>, Vt2.<T>, ...] */
case 81: /* vld[1-4]|vld[1-4]r (Rn), [Vt1.<T>, Vt2.<T>, ...] */
c.checkoffset(p, p.As)
r := int(p.From.Reg)
o1 = 3<<26 | 1<<22
o1 = c.oprrr(p, p.As)
if o.scond == C_XPOST {
o1 |= 1 << 23
if p.From.Index == 0 {
......@@ -4358,7 +4359,7 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
if isRegShiftOrExt(&p.From) {
c.ctxt.Diag("invalid extended register op: %v\n", p)
}
o1 |= uint32(p.From.Index&31) << 16
o1 |= uint32(p.From.Index&0x1f) << 16
}
}
o1 |= uint32(p.To.Offset)
......@@ -5591,6 +5592,15 @@ func (c *ctxt7) oprrr(p *obj.Prog, a obj.As) uint32 {
case AVRBIT:
return 0x2E<<24 | 1<<22 | 0x10<<17 | 5<<12 | 2<<10
case AVLD1, AVLD2, AVLD3, AVLD4:
return 3<<26 | 1<<22
case AVLD1R, AVLD3R:
return 0xD<<24 | 1<<22
case AVLD2R, AVLD4R:
return 0xD<<24 | 3<<21
}
c.ctxt.Diag("%v: bad rrr %d %v", p, a, a)
......@@ -6779,6 +6789,10 @@ func (c *ctxt7) maskOpvldvst(p *obj.Prog, o1 uint32) uint32 {
o1 &^= 0xf000 // mask out "opcode" field (bit 12-15)
switch p.As {
case AVLD1R, AVLD2R:
o1 |= 0xC << 12
case AVLD3R, AVLD4R:
o1 |= 0xE << 12
case AVLD2, AVST2:
o1 |= 8 << 12
case AVLD3, AVST3:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment