Commit 50f1f639 authored by isharipo's avatar isharipo Committed by Ilya Tocar

cmd/asm: add most SSE4 missing instructions

Instructions added:
  INSERTPS immb, r/m, xmm
  MPSADBW immb, r/m, xmm
  BLENDPD immb, r/m, xmm
  BLENDPS immb, r/m, xmm
  DPPD immb, r/m, xmm
  DPPS immb, r/m, xmm
  MOVNTDQA r/m, xmm
  PACKUSDW r/m, xmm
  PBLENDW immb, r/m, xmm
  PCMPEQQ r/m, xmm
  PCMPGTQ r/m, xmm
  PCMPISTRI immb, r/m, xmm
  PCMPISTRM immb, r/m, xmm
  PMAXSB r/m, xmm
  PMAXSD r/m, xmm
  PMAXUD r/m, xmm
  PMAXUW r/m, xmm
  PMINSB r/m, xmm
  PMINSD r/m, xmm
  PMINUD r/m, xmm
  PMINUW r/m, xmm
  PTEST r/m, xmm
  PCMPESTRM immb, r/m, xmm

Note: only 'optab' table is extended.

`EXTRACTPS immb, xmm, r/m` is not included in this
change due to new ytab set 'yextractps'. This should simplify
code review.

4-operand instructions are a subject of upcoming changes that
make 4-th (and so on) operands explicit.
Related TODO note in asm6.go:
"dont't hide 4op, some version have xmm version".

Part of the mission to add missing amd64 SSE4 instructions to Go asm.

Change-Id: I71716df14a8a5332e866dd0f0d52d43d7714872f
Reviewed-on: https://go-review.googlesource.com/57470
Run-TryBot: Iskander Sharipov <iskander.sharipov@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
parent b15e8bab
......@@ -116,6 +116,7 @@ const (
AINCW
AINSB
AINSL
AINSERTPS
AINSW
AINT
AINTO
......@@ -171,6 +172,7 @@ const (
AMOVSB
AMOVSL
AMOVSW
AMPSADBW
AMULB
AMULL
AMULW
......@@ -530,6 +532,8 @@ const (
AANDPS
ABEXTRL
ABEXTRQ
ABLENDPD
ABLENDPS
ABLSIL
ABLSIQ
ABLSMSKL
......@@ -570,6 +574,8 @@ const (
ADIVPS
ADIVSD
ADIVSS
ADPPD
ADPPS
AEMMS
AFXRSTOR
AFXRSTOR64
......@@ -599,6 +605,7 @@ const (
AMOVMSKPD
AMOVMSKPS
AMOVNTO
AMOVNTDQA
AMOVNTPD
AMOVNTPS
AMOVNTQ
......@@ -618,6 +625,7 @@ const (
AORPS
APACKSSLW
APACKSSWB
APACKUSDW
APACKUSWB
APADDB
APADDL
......@@ -632,12 +640,17 @@ const (
APANDN
APAVGB
APAVGW
APBLENDW
APCMPEQB
APCMPEQL
APCMPEQQ
APCMPEQW
APCMPGTB
APCMPGTL
APCMPGTQ
APCMPGTW
APCMPISTRI
APCMPISTRM
APDEPL
APDEPQ
APEXTL
......@@ -659,10 +672,18 @@ const (
APINSRW
APMADDUBSW
APMADDWL
APMAXSB
APMAXSD
APMAXSW
APMAXUB
APMAXUD
APMAXUW
APMINSB
APMINSD
APMINSW
APMINUB
APMINUD
APMINUW
APMOVMSKB
APMOVSXBD
APMOVSXBQ
......@@ -711,6 +732,7 @@ const (
APSUBUSB
APSUBUSW
APSUBW
APTEST
APUNPCKHBW
APUNPCKHLQ
APUNPCKHQDQ
......@@ -750,6 +772,7 @@ const (
AXORPD
AXORPS
APCMPESTRI
APCMPESTRM
ARETFW
ARETFL
......
......@@ -78,6 +78,7 @@ var Anames = []string{
"INCW",
"INSB",
"INSL",
"INSERTPS",
"INSW",
"INT",
"INTO",
......@@ -133,6 +134,7 @@ var Anames = []string{
"MOVSB",
"MOVSL",
"MOVSW",
"MPSADBW",
"MULB",
"MULL",
"MULW",
......@@ -473,6 +475,8 @@ var Anames = []string{
"ANDPS",
"BEXTRL",
"BEXTRQ",
"BLENDPD",
"BLENDPS",
"BLSIL",
"BLSIQ",
"BLSMSKL",
......@@ -513,6 +517,8 @@ var Anames = []string{
"DIVPS",
"DIVSD",
"DIVSS",
"DPPD",
"DPPS",
"EMMS",
"FXRSTOR",
"FXRSTOR64",
......@@ -542,6 +548,7 @@ var Anames = []string{
"MOVMSKPD",
"MOVMSKPS",
"MOVNTO",
"MOVNTDQA",
"MOVNTPD",
"MOVNTPS",
"MOVNTQ",
......@@ -561,6 +568,7 @@ var Anames = []string{
"ORPS",
"PACKSSLW",
"PACKSSWB",
"PACKUSDW",
"PACKUSWB",
"PADDB",
"PADDL",
......@@ -575,12 +583,17 @@ var Anames = []string{
"PANDN",
"PAVGB",
"PAVGW",
"PBLENDW",
"PCMPEQB",
"PCMPEQL",
"PCMPEQQ",
"PCMPEQW",
"PCMPGTB",
"PCMPGTL",
"PCMPGTQ",
"PCMPGTW",
"PCMPISTRI",
"PCMPISTRM",
"PDEPL",
"PDEPQ",
"PEXTL",
......@@ -602,10 +615,18 @@ var Anames = []string{
"PINSRW",
"PMADDUBSW",
"PMADDWL",
"PMAXSB",
"PMAXSD",
"PMAXSW",
"PMAXUB",
"PMAXUD",
"PMAXUW",
"PMINSB",
"PMINSD",
"PMINSW",
"PMINUB",
"PMINUD",
"PMINUW",
"PMOVMSKB",
"PMOVSXBD",
"PMOVSXBQ",
......@@ -654,6 +675,7 @@ var Anames = []string{
"PSUBUSB",
"PSUBUSW",
"PSUBW",
"PTEST",
"PUNPCKHBW",
"PUNPCKHLQ",
"PUNPCKHQDQ",
......@@ -693,6 +715,7 @@ var Anames = []string{
"XORPD",
"XORPS",
"PCMPESTRI",
"PCMPESTRM",
"RETFW",
"RETFL",
"RETFQ",
......
......@@ -733,6 +733,11 @@ var ymshufb = []ytab{
{Yxm, Ynone, Yxr, Zm2_r, 2},
}
// It should never have more than 1 entry,
// because some optab entries you opcode secuences that
// are longer than 2 bytes (zoffset=2 here),
// ROUNDPD and ROUNDPS and recently added BLENDPD,
// to name a few.
var yxshuf = []ytab{
{Yu8, Yxm, Yxr, Zibm_r, 2},
}
......@@ -1107,6 +1112,8 @@ var optab =
{ADIVSD, yxm, Pf2, [23]uint8{0x5e}},
{ADIVSS, yxm, Pf3, [23]uint8{0x5e}},
{ADIVW, ydivl, Pe, [23]uint8{0xf7, 06}},
{ADPPD, yxshuf, Pq, [23]uint8{0x3a, 0x41, 0}},
{ADPPS, yxshuf, Pq, [23]uint8{0x3a, 0x40, 0}},
{AEMMS, ynone, Pm, [23]uint8{0x77}},
{AENTER, nil, 0, [23]uint8{}}, /* botch */
{AFXRSTOR, ysvrs, Pm, [23]uint8{0xae, 01, 0xae, 01}},
......@@ -1131,6 +1138,7 @@ var optab =
{AINL, yin, Px, [23]uint8{0xe5, 0xed}},
{AINSB, ynone, Pb, [23]uint8{0x6c}},
{AINSL, ynone, Px, [23]uint8{0x6d}},
{AINSERTPS, yxshuf, Pq, [23]uint8{0x3a, 0x21, 0}},
{AINSW, ynone, Pe, [23]uint8{0x6d}},
{AINT, yint, Px, [23]uint8{0xcd}},
{AINTO, ynone, P32, [23]uint8{0xce}},
......@@ -1217,6 +1225,7 @@ var optab =
{AMOVMSKPD, yxrrl, Pq, [23]uint8{0x50}},
{AMOVMSKPS, yxrrl, Pm, [23]uint8{0x50}},
{AMOVNTO, yxr_ml, Pe, [23]uint8{0xe7}},
{AMOVNTDQA, ylddqu, Pq4, [23]uint8{0x2a}},
{AMOVNTPD, yxr_ml, Pe, [23]uint8{0x2b}},
{AMOVNTPS, yxr_ml, Pm, [23]uint8{0x2b}},
{AMOVNTQ, ymr_ml, Pm, [23]uint8{0xe7}},
......@@ -1235,6 +1244,7 @@ var optab =
{AMOVWLZX, yml_rl, Pm, [23]uint8{0xb7}},
{AMOVWQSX, yml_rl, Pw, [23]uint8{0x0f, 0xbf}},
{AMOVWQZX, yml_rl, Pw, [23]uint8{0x0f, 0xb7}},
{AMPSADBW, yxshuf, Pq, [23]uint8{0x3a, 0x42, 0}},
{AMULB, ydivb, Pb, [23]uint8{0xf6, 04}},
{AMULL, ydivl, Px, [23]uint8{0xf7, 04}},
{AMULPD, yxm, Pe, [23]uint8{0x59}},
......@@ -1269,6 +1279,7 @@ var optab =
{APABSW, yxm_q4, Pq4, [23]uint8{0x1d}},
{APACKSSLW, ymm, Py1, [23]uint8{0x6b, Pe, 0x6b}},
{APACKSSWB, ymm, Py1, [23]uint8{0x63, Pe, 0x63}},
{APACKUSDW, yxm_q4, Pq4, [23]uint8{0x2b}},
{APACKUSWB, ymm, Py1, [23]uint8{0x67, Pe, 0x67}},
{APADDB, ymm, Py1, [23]uint8{0xfc, Pe, 0xfc}},
{APADDL, ymm, Py1, [23]uint8{0xfe, Pe, 0xfe}},
......@@ -1284,12 +1295,17 @@ var optab =
{APAUSE, ynone, Px, [23]uint8{0xf3, 0x90}},
{APAVGB, ymm, Py1, [23]uint8{0xe0, Pe, 0xe0}},
{APAVGW, ymm, Py1, [23]uint8{0xe3, Pe, 0xe3}},
{APBLENDW, yxshuf, Pq, [23]uint8{0x3a, 0x0e, 0}},
{APCMPEQB, ymm, Py1, [23]uint8{0x74, Pe, 0x74}},
{APCMPEQL, ymm, Py1, [23]uint8{0x76, Pe, 0x76}},
{APCMPEQQ, yxm_q4, Pq4, [23]uint8{0x29}},
{APCMPEQW, ymm, Py1, [23]uint8{0x75, Pe, 0x75}},
{APCMPGTB, ymm, Py1, [23]uint8{0x64, Pe, 0x64}},
{APCMPGTL, ymm, Py1, [23]uint8{0x66, Pe, 0x66}},
{APCMPGTQ, yxm_q4, Pq4, [23]uint8{0x37}},
{APCMPGTW, ymm, Py1, [23]uint8{0x65, Pe, 0x65}},
{APCMPISTRI, yxshuf, Pq, [23]uint8{0x3a, 0x63, 0}},
{APCMPISTRM, yxshuf, Pq, [23]uint8{0x3a, 0x62, 0}},
{APEXTRW, yextrw, Pq, [23]uint8{0xc5, 00}},
{APEXTRB, yextr, Pq, [23]uint8{0x3a, 0x14, 00}},
{APEXTRD, yextr, Pq, [23]uint8{0x3a, 0x16, 00}},
......@@ -1307,10 +1323,18 @@ var optab =
{APINSRQ, yinsr, Pq3, [23]uint8{0x3a, 0x22, 00}},
{APMADDUBSW, yxm_q4, Pq4, [23]uint8{0x04}},
{APMADDWL, ymm, Py1, [23]uint8{0xf5, Pe, 0xf5}},
{APMAXSB, yxm_q4, Pq4, [23]uint8{0x3c}},
{APMAXSD, yxm_q4, Pq4, [23]uint8{0x3d}},
{APMAXSW, yxm, Pe, [23]uint8{0xee}},
{APMAXUB, yxm, Pe, [23]uint8{0xde}},
{APMAXUD, yxm_q4, Pq4, [23]uint8{0x3f}},
{APMAXUW, yxm_q4, Pq4, [23]uint8{0x3e}},
{APMINSB, yxm_q4, Pq4, [23]uint8{0x38}},
{APMINSD, yxm_q4, Pq4, [23]uint8{0x39}},
{APMINSW, yxm, Pe, [23]uint8{0xea}},
{APMINUB, yxm, Pe, [23]uint8{0xda}},
{APMINUD, yxm_q4, Pq4, [23]uint8{0x3b}},
{APMINUW, yxm_q4, Pq4, [23]uint8{0x3a}},
{APMOVMSKB, ymskb, Px, [23]uint8{Pe, 0xd7, 0xd7}},
{APMOVSXBD, yxm_q4, Pq4, [23]uint8{0x21}},
{APMOVSXBQ, yxm_q4, Pq4, [23]uint8{0x22}},
......@@ -1370,6 +1394,7 @@ var optab =
{APSUBUSB, yxm, Pe, [23]uint8{0xd8}},
{APSUBUSW, yxm, Pe, [23]uint8{0xd9}},
{APSUBW, yxm, Pe, [23]uint8{0xf9}},
{APTEST, yxm_q4, Pq4, [23]uint8{0x17}},
{APUNPCKHBW, ymm, Py1, [23]uint8{0x68, Pe, 0x68}},
{APUNPCKHLQ, ymm, Py1, [23]uint8{0x6a, Pe, 0x6a}},
{APUNPCKHQDQ, yxm, Pe, [23]uint8{0x6d}},
......@@ -1656,6 +1681,7 @@ var optab =
{APSHUFD, yxshuf, Pq, [23]uint8{0x70, 0}},
{APCLMULQDQ, yxshuf, Pq, [23]uint8{0x3a, 0x44, 0}},
{APCMPESTRI, yxshuf, Pq, [23]uint8{0x3a, 0x61, 0}},
{APCMPESTRM, yxshuf, Pq, [23]uint8{0x3a, 0x60, 0}},
{AMOVDDUP, yxm, Pf2, [23]uint8{0x12}},
{AMOVSHDUP, yxm, Pf3, [23]uint8{0x16}},
{AMOVSLDUP, yxm, Pf3, [23]uint8{0x12}},
......@@ -1664,6 +1690,8 @@ var optab =
{AANDNQ, yvex_r3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF2}},
{ABEXTRL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF7}},
{ABEXTRQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF7}},
{ABLENDPD, yxshuf, Pq, [23]uint8{0x3a, 0x0d, 0}},
{ABLENDPS, yxshuf, Pq, [23]uint8{0x3a, 0x0c, 0}},
{ABZHIL, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W0, 0xF5}},
{ABZHIQ, yvex_vmr3, Pvex, [23]uint8{VEX_LZ_0F38_W1, 0xF5}},
{AMULXL, yvex_r3, Pvex, [23]uint8{VEX_LZ_F2_0F38_W0, 0xF6}},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment