Commit 6367c19f authored by Agniva De Sarker's avatar Agniva De Sarker Committed by Ilya Tocar

cmd/internal/obj/x86: add some more AVX2 instructions

This adds the VFMADD[213|231]SD, VFNMADD[213|231]SD,
VADDSD, VSUBSD instructions

This will allow us to write a fast path for exp_amd64.s where
these optimizations can be applied in a lot of places.

Change-Id: Ide292107ab887bd1e225a1ad60880235b5ed7c61
Reviewed-on: https://go-review.googlesource.com/61810Reviewed-by: default avatarIlya Tocar <ilya.tocar@intel.com>
Run-TryBot: Ilya Tocar <ilya.tocar@intel.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent 31ddd8a3
...@@ -5886,14 +5886,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -5886,14 +5886,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VADDPS (R11), Y15, Y11 // c44104581b //TODO: VADDPS (R11), Y15, Y11 // c44104581b
//TODO: VADDPS Y2, Y15, Y11 // c4610458da or c50458da //TODO: VADDPS Y2, Y15, Y11 // c4610458da or c50458da
//TODO: VADDPS Y11, Y15, Y11 // c4410458db //TODO: VADDPS Y11, Y15, Y11 // c4410458db
//TODO: VADDSD (BX), X9, X2 // c4e1335813 or c5b35813 VADDSD (BX), X9, X2 // c4e1335813 or c5b35813
//TODO: VADDSD (R11), X9, X2 // c4c1335813 VADDSD (R11), X9, X2 // c4c1335813
//TODO: VADDSD X2, X9, X2 // c4e13358d2 or c5b358d2 VADDSD X2, X9, X2 // c4e13358d2 or c5b358d2
//TODO: VADDSD X11, X9, X2 // c4c13358d3 VADDSD X11, X9, X2 // c4c13358d3
//TODO: VADDSD (BX), X9, X11 // c46133581b or c533581b VADDSD (BX), X9, X11 // c46133581b or c533581b
//TODO: VADDSD (R11), X9, X11 // c44133581b VADDSD (R11), X9, X11 // c44133581b
//TODO: VADDSD X2, X9, X11 // c4613358da or c53358da VADDSD X2, X9, X11 // c4613358da or c53358da
//TODO: VADDSD X11, X9, X11 // c4413358db VADDSD X11, X9, X11 // c4413358db
//TODO: VADDSS (BX), X9, X2 // c4e1325813 or c5b25813 //TODO: VADDSS (BX), X9, X2 // c4e1325813 or c5b25813
//TODO: VADDSS (R11), X9, X2 // c4c1325813 //TODO: VADDSS (R11), X9, X2 // c4c1325813
//TODO: VADDSS X2, X9, X2 // c4e13258d2 or c5b258d2 //TODO: VADDSS X2, X9, X2 // c4e13258d2 or c5b258d2
...@@ -6662,14 +6662,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -6662,14 +6662,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VFMADD213PS (R11), Y15, Y11 // c44205a81b //TODO: VFMADD213PS (R11), Y15, Y11 // c44205a81b
//TODO: VFMADD213PS Y2, Y15, Y11 // c46205a8da //TODO: VFMADD213PS Y2, Y15, Y11 // c46205a8da
//TODO: VFMADD213PS Y11, Y15, Y11 // c44205a8db //TODO: VFMADD213PS Y11, Y15, Y11 // c44205a8db
//TODO: VFMADD213SD (BX), X9, X2 // c4e2b1a913 VFMADD213SD (BX), X9, X2 // c4e2b1a913
//TODO: VFMADD213SD (R11), X9, X2 // c4c2b1a913 VFMADD213SD (R11), X9, X2 // c4c2b1a913
//TODO: VFMADD213SD X2, X9, X2 // c4e2b1a9d2 VFMADD213SD X2, X9, X2 // c4e2b1a9d2
//TODO: VFMADD213SD X11, X9, X2 // c4c2b1a9d3 VFMADD213SD X11, X9, X2 // c4c2b1a9d3
//TODO: VFMADD213SD (BX), X9, X11 // c462b1a91b VFMADD213SD (BX), X9, X11 // c462b1a91b
//TODO: VFMADD213SD (R11), X9, X11 // c442b1a91b VFMADD213SD (R11), X9, X11 // c442b1a91b
//TODO: VFMADD213SD X2, X9, X11 // c462b1a9da VFMADD213SD X2, X9, X11 // c462b1a9da
//TODO: VFMADD213SD X11, X9, X11 // c442b1a9db VFMADD213SD X11, X9, X11 // c442b1a9db
//TODO: VFMADD213SS (BX), X9, X2 // c4e231a913 //TODO: VFMADD213SS (BX), X9, X2 // c4e231a913
//TODO: VFMADD213SS (R11), X9, X2 // c4c231a913 //TODO: VFMADD213SS (R11), X9, X2 // c4c231a913
//TODO: VFMADD213SS X2, X9, X2 // c4e231a9d2 //TODO: VFMADD213SS X2, X9, X2 // c4e231a9d2
...@@ -6710,14 +6710,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -6710,14 +6710,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VFMADD231PS (R11), Y15, Y11 // c44205b81b //TODO: VFMADD231PS (R11), Y15, Y11 // c44205b81b
//TODO: VFMADD231PS Y2, Y15, Y11 // c46205b8da //TODO: VFMADD231PS Y2, Y15, Y11 // c46205b8da
//TODO: VFMADD231PS Y11, Y15, Y11 // c44205b8db //TODO: VFMADD231PS Y11, Y15, Y11 // c44205b8db
//TODO: VFMADD231SD (BX), X9, X2 // c4e2b1b913 VFMADD231SD (BX), X9, X2 // c4e2b1b913
//TODO: VFMADD231SD (R11), X9, X2 // c4c2b1b913 VFMADD231SD (R11), X9, X2 // c4c2b1b913
//TODO: VFMADD231SD X2, X9, X2 // c4e2b1b9d2 VFMADD231SD X2, X9, X2 // c4e2b1b9d2
//TODO: VFMADD231SD X11, X9, X2 // c4c2b1b9d3 VFMADD231SD X11, X9, X2 // c4c2b1b9d3
//TODO: VFMADD231SD (BX), X9, X11 // c462b1b91b VFMADD231SD (BX), X9, X11 // c462b1b91b
//TODO: VFMADD231SD (R11), X9, X11 // c442b1b91b VFMADD231SD (R11), X9, X11 // c442b1b91b
//TODO: VFMADD231SD X2, X9, X11 // c462b1b9da VFMADD231SD X2, X9, X11 // c462b1b9da
//TODO: VFMADD231SD X11, X9, X11 // c442b1b9db VFMADD231SD X11, X9, X11 // c442b1b9db
//TODO: VFMADD231SS (BX), X9, X2 // c4e231b913 //TODO: VFMADD231SS (BX), X9, X2 // c4e231b913
//TODO: VFMADD231SS (R11), X9, X2 // c4c231b913 //TODO: VFMADD231SS (R11), X9, X2 // c4c231b913
//TODO: VFMADD231SS X2, X9, X2 // c4e231b9d2 //TODO: VFMADD231SS X2, X9, X2 // c4e231b9d2
...@@ -7142,14 +7142,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -7142,14 +7142,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VFNMADD213PS (R11), Y15, Y11 // c44205ac1b //TODO: VFNMADD213PS (R11), Y15, Y11 // c44205ac1b
//TODO: VFNMADD213PS Y2, Y15, Y11 // c46205acda //TODO: VFNMADD213PS Y2, Y15, Y11 // c46205acda
//TODO: VFNMADD213PS Y11, Y15, Y11 // c44205acdb //TODO: VFNMADD213PS Y11, Y15, Y11 // c44205acdb
//TODO: VFNMADD213SD (BX), X9, X2 // c4e2b1ad13 VFNMADD213SD (BX), X9, X2 // c4e2b1ad13
//TODO: VFNMADD213SD (R11), X9, X2 // c4c2b1ad13 VFNMADD213SD (R11), X9, X2 // c4c2b1ad13
//TODO: VFNMADD213SD X2, X9, X2 // c4e2b1add2 VFNMADD213SD X2, X9, X2 // c4e2b1add2
//TODO: VFNMADD213SD X11, X9, X2 // c4c2b1add3 VFNMADD213SD X11, X9, X2 // c4c2b1add3
//TODO: VFNMADD213SD (BX), X9, X11 // c462b1ad1b VFNMADD213SD (BX), X9, X11 // c462b1ad1b
//TODO: VFNMADD213SD (R11), X9, X11 // c442b1ad1b VFNMADD213SD (R11), X9, X11 // c442b1ad1b
//TODO: VFNMADD213SD X2, X9, X11 // c462b1adda VFNMADD213SD X2, X9, X11 // c462b1adda
//TODO: VFNMADD213SD X11, X9, X11 // c442b1addb VFNMADD213SD X11, X9, X11 // c442b1addb
//TODO: VFNMADD213SS (BX), X9, X2 // c4e231ad13 //TODO: VFNMADD213SS (BX), X9, X2 // c4e231ad13
//TODO: VFNMADD213SS (R11), X9, X2 // c4c231ad13 //TODO: VFNMADD213SS (R11), X9, X2 // c4c231ad13
//TODO: VFNMADD213SS X2, X9, X2 // c4e231add2 //TODO: VFNMADD213SS X2, X9, X2 // c4e231add2
...@@ -7190,14 +7190,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -7190,14 +7190,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VFNMADD231PS (R11), Y15, Y11 // c44205bc1b //TODO: VFNMADD231PS (R11), Y15, Y11 // c44205bc1b
//TODO: VFNMADD231PS Y2, Y15, Y11 // c46205bcda //TODO: VFNMADD231PS Y2, Y15, Y11 // c46205bcda
//TODO: VFNMADD231PS Y11, Y15, Y11 // c44205bcdb //TODO: VFNMADD231PS Y11, Y15, Y11 // c44205bcdb
//TODO: VFNMADD231SD (BX), X9, X2 // c4e2b1bd13 VFNMADD231SD (BX), X9, X2 // c4e2b1bd13
//TODO: VFNMADD231SD (R11), X9, X2 // c4c2b1bd13 VFNMADD231SD (R11), X9, X2 // c4c2b1bd13
//TODO: VFNMADD231SD X2, X9, X2 // c4e2b1bdd2 VFNMADD231SD X2, X9, X2 // c4e2b1bdd2
//TODO: VFNMADD231SD X11, X9, X2 // c4c2b1bdd3 VFNMADD231SD X11, X9, X2 // c4c2b1bdd3
//TODO: VFNMADD231SD (BX), X9, X11 // c462b1bd1b VFNMADD231SD (BX), X9, X11 // c462b1bd1b
//TODO: VFNMADD231SD (R11), X9, X11 // c442b1bd1b VFNMADD231SD (R11), X9, X11 // c442b1bd1b
//TODO: VFNMADD231SD X2, X9, X11 // c462b1bdda VFNMADD231SD X2, X9, X11 // c462b1bdda
//TODO: VFNMADD231SD X11, X9, X11 // c442b1bddb VFNMADD231SD X11, X9, X11 // c442b1bddb
//TODO: VFNMADD231SS (BX), X9, X2 // c4e231bd13 //TODO: VFNMADD231SS (BX), X9, X2 // c4e231bd13
//TODO: VFNMADD231SS (R11), X9, X2 // c4c231bd13 //TODO: VFNMADD231SS (R11), X9, X2 // c4c231bd13
//TODO: VFNMADD231SS X2, X9, X2 // c4e231bdd2 //TODO: VFNMADD231SS X2, X9, X2 // c4e231bdd2
...@@ -10314,14 +10314,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0 ...@@ -10314,14 +10314,14 @@ TEXT asmtest(SB),DUPOK|NOSPLIT,$0
//TODO: VSUBPS (R11), Y15, Y11 // c441045c1b //TODO: VSUBPS (R11), Y15, Y11 // c441045c1b
//TODO: VSUBPS Y2, Y15, Y11 // c461045cda or c5045cda //TODO: VSUBPS Y2, Y15, Y11 // c461045cda or c5045cda
//TODO: VSUBPS Y11, Y15, Y11 // c441045cdb //TODO: VSUBPS Y11, Y15, Y11 // c441045cdb
//TODO: VSUBSD (BX), X9, X2 // c4e1335c13 or c5b35c13 VSUBSD (BX), X9, X2 // c4e1335c13 or c5b35c13
//TODO: VSUBSD (R11), X9, X2 // c4c1335c13 VSUBSD (R11), X9, X2 // c4c1335c13
//TODO: VSUBSD X2, X9, X2 // c4e1335cd2 or c5b35cd2 VSUBSD X2, X9, X2 // c4e1335cd2 or c5b35cd2
//TODO: VSUBSD X11, X9, X2 // c4c1335cd3 VSUBSD X11, X9, X2 // c4c1335cd3
//TODO: VSUBSD (BX), X9, X11 // c461335c1b or c5335c1b VSUBSD (BX), X9, X11 // c461335c1b or c5335c1b
//TODO: VSUBSD (R11), X9, X11 // c441335c1b VSUBSD (R11), X9, X11 // c441335c1b
//TODO: VSUBSD X2, X9, X11 // c461335cda or c5335cda VSUBSD X2, X9, X11 // c461335cda or c5335cda
//TODO: VSUBSD X11, X9, X11 // c441335cdb VSUBSD X11, X9, X11 // c441335cdb
//TODO: VSUBSS (BX), X9, X2 // c4e1325c13 or c5b25c13 //TODO: VSUBSS (BX), X9, X2 // c4e1325c13 or c5b25c13
//TODO: VSUBSS (R11), X9, X2 // c4c1325c13 //TODO: VSUBSS (R11), X9, X2 // c4c1325c13
//TODO: VSUBSS X2, X9, X2 // c4e1325cd2 or c5b25cd2 //TODO: VSUBSS X2, X9, X2 // c4e1325cd2 or c5b25cd2
......
...@@ -838,11 +838,17 @@ const ( ...@@ -838,11 +838,17 @@ const (
AVPERM2I128 AVPERM2I128
ARORXL ARORXL
ARORXQ ARORXQ
AVADDSD
AVBROADCASTSS AVBROADCASTSS
AVBROADCASTSD AVBROADCASTSD
AVFMADD213SD
AVFMADD231SD
AVFNMADD213SD
AVFNMADD231SD
AVMOVDDUP AVMOVDDUP
AVMOVSHDUP AVMOVSHDUP
AVMOVSLDUP AVMOVSLDUP
AVSUBSD
// from 386 // from 386
AJCXZW AJCXZW
......
...@@ -773,11 +773,17 @@ var Anames = []string{ ...@@ -773,11 +773,17 @@ var Anames = []string{
"VPERM2I128", "VPERM2I128",
"RORXL", "RORXL",
"RORXQ", "RORXQ",
"VADDSD",
"VBROADCASTSS", "VBROADCASTSS",
"VBROADCASTSD", "VBROADCASTSD",
"VFMADD213SD",
"VFMADD231SD",
"VFNMADD213SD",
"VFNMADD231SD",
"VMOVDDUP", "VMOVDDUP",
"VMOVSHDUP", "VMOVSHDUP",
"VMOVSLDUP", "VMOVSLDUP",
"VSUBSD",
"JCXZW", "JCXZW",
"FCMOVCC", "FCMOVCC",
"FCMOVCS", "FCMOVCS",
......
...@@ -819,6 +819,10 @@ var yvex_xy3 = []ytab{ ...@@ -819,6 +819,10 @@ var yvex_xy3 = []ytab{
{Yym, Yyr, Yyr, Zvex_rm_v_r, 2}, {Yym, Yyr, Yyr, Zvex_rm_v_r, 2},
} }
var yvex_x3 = []ytab{
{Yxm, Yxr, Yxr, Zvex_rm_v_r, 2},
}
var yvex_ri3 = []ytab{ var yvex_ri3 = []ytab{
{Yi8, Ymb, Yrl, Zvex_i_rm_r, 2}, {Yi8, Ymb, Yrl, Zvex_i_rm_r, 2},
} }
...@@ -1722,6 +1726,12 @@ var optab = ...@@ -1722,6 +1726,12 @@ var optab =
{AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}}, {AVPOR, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xeb, VEX_256_66_0F_WIG, 0xeb}},
{AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}}, {AVPADDQ, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xd4, VEX_256_66_0F_WIG, 0xd4}},
{AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}}, {AVPADDD, yvex_xy3, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0xfe, VEX_256_66_0F_WIG, 0xfe}},
{AVADDSD, yvex_x3, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x58}},
{AVSUBSD, yvex_x3, Pvex, [23]uint8{VEX_128_F2_0F_WIG, 0x5c}},
{AVFMADD213SD, yvex_x3, Pvex, [23]uint8{VEX_LIG_66_0F38_W1, 0xa9}},
{AVFMADD231SD, yvex_x3, Pvex, [23]uint8{VEX_LIG_66_0F38_W1, 0xb9}},
{AVFNMADD213SD, yvex_x3, Pvex, [23]uint8{VEX_LIG_66_0F38_W1, 0xad}},
{AVFNMADD231SD, yvex_x3, Pvex, [23]uint8{VEX_LIG_66_0F38_W1, 0xbd}},
{AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}}, {AVPSLLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xf0, VEX_256_66_0F_WIG, 0x72, 0xf0, VEX_128_66_0F_WIG, 0xf2, VEX_256_66_0F_WIG, 0xf2}},
{AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}}, {AVPSLLQ, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x73, 0xf0, VEX_256_66_0F_WIG, 0x73, 0xf0, VEX_128_66_0F_WIG, 0xf3, VEX_256_66_0F_WIG, 0xf3}},
{AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}}, {AVPSRLD, yvex_shift, Pvex, [23]uint8{VEX_128_66_0F_WIG, 0x72, 0xd0, VEX_256_66_0F_WIG, 0x72, 0xd0, VEX_128_66_0F_WIG, 0xd2, VEX_256_66_0F_WIG, 0xd2}},
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment