Commit 4e19cfcd authored by isharipo's avatar isharipo Committed by Ilya Tocar

cmd/internal/obj/x86: add AVX2 gather and VSIB

Enables AVX2 gather instructions and VSIB support,
which makes vm32{x,y} vm64{x,y} operands encodable.

AXXX constants placed with respect to sorting order.
New VEX optabs inserted near non-VEX entries to simplify
potential transition to auto-generated VSIB optabs.

Tests go into new AMD64 encoder test file (amd64enc_extra.s)
to avoid unnecessary interactions with auto-generated "amd64enc.s".

Side note: x86avxgen did not produced these instructions
because x86.v0.2.csv misses them.
This also explains why x86 test suite have no AVX2 gather
instructions tests.

List of new instructions:
  VGATHERPDP
  VGATHERDPS
  VGATHERQPD
  VGATHERQPS
  VPGATHERDD
  VPGATHERDQ
  VPGATHERQD
  VPGATHERQQ

Change-Id: Iac852f3c5016523670bd99de6bec6a48f66fb4f6
Reviewed-on: https://go-review.googlesource.com/77970
Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
parent 3a395e22
......@@ -391,6 +391,7 @@ func TestAMD64EndToEnd(t *testing.T) {
func TestAMD64Encoder(t *testing.T) {
testEndToEnd(t, "amd64", "amd64enc")
testEndToEnd(t, "amd64", "amd64enc_extra")
}
func TestAMD64Errors(t *testing.T) {
......
This diff is collapsed.
......@@ -7,4 +7,29 @@ TEXT errors(SB),$0
MOVL (AX)(SP*1), AX // ERROR "invalid instruction"
EXTRACTPS $4, X2, (BX) // ERROR "invalid instruction"
EXTRACTPS $-1, X2, (BX) // ERROR "invalid instruction"
// VSIB addressing does not permit non-vector (X/Y)
// scaled index register.
VPGATHERDQ X12,(R13)(AX*2), X11 // ERROR "invalid instruction"
VPGATHERDQ X2, 664(BX*1), X1 // ERROR "invalid instruction"
VPGATHERDQ Y2, (BP)(AX*2), Y1 // ERROR "invalid instruction"
VPGATHERDQ Y5, 664(DX*8), Y6 // ERROR "invalid instruction"
VPGATHERDQ Y5, (DX), Y0 // ERROR "invalid instruction"
// VM/X rejects Y index register.
VPGATHERDQ Y5, 664(Y14*8), Y6 // ERROR "invalid instruction"
VPGATHERQQ X2, (BP)(Y7*2), X1 // ERROR "invalid instruction"
// VM/Y rejects X index register.
VPGATHERQQ Y2, (BP)(X7*2), Y1 // ERROR "invalid instruction"
VPGATHERDD Y5, -8(X14*8), Y6 // ERROR "invalid instruction"
// No VSIB for legacy instructions.
MOVL (AX)(X0*1), AX // ERROR "invalid instruction"
MOVL (AX)(Y0*1), AX // ERROR "invalid instruction"
// AVX2GATHER mask/index/dest #UD cases.
VPGATHERQQ Y2, (BP)(X2*2), Y2 // ERROR "mask, index, and destination registers should be distinct"
VPGATHERQQ Y2, (BP)(X2*2), Y7 // ERROR "mask, index, and destination registers should be distinct"
VPGATHERQQ Y2, (BP)(X7*2), Y2 // ERROR "mask, index, and destination registers should be distinct"
VPGATHERQQ Y7, (BP)(X2*2), Y2 // ERROR "mask, index, and destination registers should be distinct"
VPGATHERDQ X2, 664(X2*8), X2 // ERROR "mask, index, and destination registers should be distinct"
VPGATHERDQ X2, 664(X2*8), X7 // ERROR "mask, index, and destination registers should be distinct"
VPGATHERDQ X2, 664(X7*8), X2 // ERROR "mask, index, and destination registers should be distinct"
VPGATHERDQ X7, 664(X2*8), X2 // ERROR "mask, index, and destination registers should be distinct"
RET
......@@ -874,6 +874,10 @@ const (
AVFNMSUB231PS
AVFNMSUB231SD
AVFNMSUB231SS
AVGATHERDPD
AVGATHERDPS
AVGATHERQPD
AVGATHERQPS
AVHADDPD
AVHADDPS
AVHSUBPD
......@@ -978,6 +982,10 @@ const (
AVPEXTRD
AVPEXTRQ
AVPEXTRW
AVPGATHERDD
AVPGATHERDQ
AVPGATHERQD
AVPGATHERQQ
AVPHADDD
AVPHADDSW
AVPHADDW
......
......@@ -873,6 +873,10 @@ var Anames = []string{
"VFNMSUB231PS",
"VFNMSUB231SD",
"VFNMSUB231SS",
"VGATHERDPD",
"VGATHERDPS",
"VGATHERQPD",
"VGATHERQPS",
"VHADDPD",
"VHADDPS",
"VHSUBPD",
......@@ -977,6 +981,10 @@ var Anames = []string{
"VPEXTRD",
"VPEXTRQ",
"VPEXTRW",
"VPGATHERDD",
"VPGATHERDQ",
"VPGATHERQD",
"VPGATHERQQ",
"VPHADDD",
"VPHADDSW",
"VPHADDW",
......
......@@ -148,8 +148,10 @@ const (
Ymm
Yxr
Yxm
Yxvm // VSIB vector array; vm32x/vm64x
Yyr
Yym
Yyvm // VSIB vector array; vm32y/vm64y
Ytls
Ytextsize
Yindir
......@@ -1034,6 +1036,21 @@ var yvex_vmovq = []ytab{
{Zvex_r_v_rm, 2, argList{Yxr, Yxm}},
}
var yvpgatherdq = []ytab{
{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
{Zvex_v_rm_r, 2, argList{Yyr, Yxvm, Yyr}},
}
var yvpgatherqq = []ytab{
{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
{Zvex_v_rm_r, 2, argList{Yyr, Yyvm, Yyr}},
}
var yvgatherqps = []ytab{
{Zvex_v_rm_r, 2, argList{Yxr, Yxvm, Yxr}},
{Zvex_v_rm_r, 2, argList{Yxr, Yyvm, Yxr}},
}
var ymmxmm0f38 = []ytab{
{Zlitm_r, 3, argList{Ymm, Ymr}},
{Zlitm_r, 5, argList{Yxm, Yxr}},
......@@ -1855,6 +1872,44 @@ var optab =
{obj.APCDATA, ypcdata, Px, [23]uint8{0, 0}},
{obj.ADUFFCOPY, yduff, Px, [23]uint8{0xe8}},
{obj.ADUFFZERO, yduff, Px, [23]uint8{0xe8}},
// AVX2 gather instructions.
// Added as a part of VSIB support implementation,
// when x86avxgen will output these, they will be moved to
// vex_optabs.go where they belong.
{AVGATHERDPD, yvpgatherdq, Pvex, [23]uint8{
vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x92,
vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x92,
}},
{AVGATHERQPD, yvpgatherqq, Pvex, [23]uint8{
vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x93,
vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x93,
}},
{AVGATHERDPS, yvpgatherqq, Pvex, [23]uint8{
vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x92,
vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x92,
}},
{AVGATHERQPS, yvgatherqps, Pvex, [23]uint8{
vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x93,
vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x93,
}},
{AVPGATHERDD, yvpgatherqq, Pvex, [23]uint8{
vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x90,
vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x90,
}},
{AVPGATHERQD, yvgatherqps, Pvex, [23]uint8{
vexDDS | vex128 | vex66 | vex0F38 | vexW0, 0x91,
vexDDS | vex256 | vex66 | vex0F38 | vexW0, 0x91,
}},
{AVPGATHERDQ, yvpgatherdq, Pvex, [23]uint8{
vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x90,
vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x90,
}},
{AVPGATHERQQ, yvpgatherqq, Pvex, [23]uint8{
vexDDS | vex128 | vex66 | vex0F38 | vexW1, 0x91,
vexDDS | vex256 | vex66 | vex0F38 | vexW1, 0x91,
}},
{obj.AEND, nil, 0, [23]uint8{}},
{0, nil, 0, [23]uint8{}},
}
......@@ -2435,6 +2490,18 @@ func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
// Can't use SP as the index register
return Yxxx
}
if a.Index >= REG_X0 && a.Index <= REG_X15 {
if ctxt.Arch.Family == sys.I386 && a.Index > REG_X7 {
return Yxxx
}
return Yxvm
}
if a.Index >= REG_Y0 && a.Index <= REG_Y15 {
if ctxt.Arch.Family == sys.I386 && a.Index > REG_Y7 {
return Yxxx
}
return Yyvm
}
if ctxt.Arch.Family == sys.AMD64 {
// Offset must fit in a 32-bit signed field (or fit in a 32-bit unsigned field
// where the sign extension doesn't matter).
......@@ -2847,9 +2914,11 @@ func (a *AsmBuf) Reset() { a.off = 0 }
// At returns the byte at offset i.
func (a *AsmBuf) At(i int) byte { return a.buf[i] }
// asmidx emits SIB byte.
func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
var i int
// X/Y index register is used in VSIB.
switch index {
default:
goto bad
......@@ -2865,7 +2934,23 @@ func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
REG_R12,
REG_R13,
REG_R14,
REG_R15:
REG_R15,
REG_X8,
REG_X9,
REG_X10,
REG_X11,
REG_X12,
REG_X13,
REG_X14,
REG_X15,
REG_Y8,
REG_Y9,
REG_Y10,
REG_Y11,
REG_Y12,
REG_Y13,
REG_Y14,
REG_Y15:
if ctxt.Arch.Family == sys.I386 {
goto bad
}
......@@ -2877,7 +2962,23 @@ func (asmbuf *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
REG_BX,
REG_BP,
REG_SI,
REG_DI:
REG_DI,
REG_X0,
REG_X1,
REG_X2,
REG_X3,
REG_X4,
REG_X5,
REG_X6,
REG_X7,
REG_Y0,
REG_Y1,
REG_Y2,
REG_Y3,
REG_Y4,
REG_Y5,
REG_Y6,
REG_Y7:
i = reg[index] << 3
}
......@@ -3488,6 +3589,35 @@ func (asmbuf *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uin
asmbuf.Put1(opcode)
}
// regIndex returns register index that fits in 4 bits.
//
// Examples:
// REG_X15 => 15
// REG_R9 => 9
// REG_AX => 0
//
func regIndex(r int16) int {
lower3bits := reg[r]
high4bit := regrex[r] & Rxr << 1
return lower3bits | high4bit
}
// avx2gatherValid returns true if p satisfies AVX2 gather constraints.
// Reports errors via ctxt.
func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
// If any pair of the index, mask, or destination registers
// are the same, this instruction results a #UD fault.
index := regIndex(p.GetFrom3().Index)
mask := regIndex(p.From.Reg)
dest := regIndex(p.To.Reg)
if dest == mask || dest == index || mask == index {
ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
return false
}
return true
}
func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
o := opindex[p.As&obj.AMask]
......@@ -3536,6 +3666,18 @@ func (asmbuf *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
p.To.Offset = p.GetFrom3().Offset
p.GetFrom3().Offset = 0
}
case AVGATHERDPD,
AVGATHERQPD,
AVGATHERDPS,
AVGATHERQPS,
AVPGATHERDD,
AVPGATHERQD,
AVPGATHERDQ,
AVPGATHERQQ:
if !avx2gatherValid(ctxt, p) {
return
}
}
if p.Ft == 0 {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment