Commit 85ecc51c authored by Carlos Eduardo Seo's avatar Carlos Eduardo Seo Committed by Lynn Boger

cmd/asm, cmd/internal/obj/ppc64: Add ISA 2.05, 2.06 and 2.07 instructions.

This change adds instructions from ISA 2.05, 2.06 and 2.07 that are frequently
used in assembly optimizations for ppc64.

It also fixes two problems:

  * the implementation of RLDICR[CC]/RLDICL[CC] did not consider all possible
  cases for the bit mask.
  * removed two non-existing instructions that were added by mistake in the VMX
  implementation (VORL/VANDL).

Change-Id: Iaef4e5c6a5240c2156c6c0f28ad3bcd8780e9830
Reviewed-on: https://go-review.googlesource.com/36230
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarLynn Boger <laboger@linux.vnet.ibm.com>
parent 06637fb3
...@@ -540,6 +540,14 @@ label1: ...@@ -540,6 +540,14 @@ label1:
// } // }
// FCMPU F1, F2, CR0 // FCMPU F1, F2, CR0
// FTDIV FRA, FRB, BF produces
// ftdiv BF, FRA, FRB
FTDIV F1,F2,$7
// FTSQRT FRB, BF produces
// ftsqrt BF, FRB
FTSQRT F2,$7
// //
// CMP // CMP
// //
...@@ -567,6 +575,10 @@ label1: ...@@ -567,6 +575,10 @@ label1:
// } // }
CMP R1, $4, CR0 // CMP R1, CR0, $4 CMP R1, $4, CR0 // CMP R1, CR0, $4
// CMPB RS,RB,RA produces
// cmpb RA,RS,RB
CMPB R2,R2,R1
// //
// rotate and mask // rotate and mask
// //
...@@ -673,6 +685,13 @@ label1: ...@@ -673,6 +685,13 @@ label1:
DCBF (R1) DCBF (R1)
DCBF (R1+R2) // DCBF (R1)(R2*1) DCBF (R1+R2) // DCBF (R1)(R2*1)
// Population count, X-form
// <MNEMONIC> RS,RA produces
// <mnemonic> RA,RS
POPCNTD R1,R2
POPCNTW R1,R2
POPCNTB R1,R2
// VMX instructions // VMX instructions
// Described as: // Described as:
...@@ -703,14 +722,14 @@ label1: ...@@ -703,14 +722,14 @@ label1:
// Vector AND, VX-form // Vector AND, VX-form
// <MNEMONIC> VRA,VRB,VRT produces // <MNEMONIC> VRA,VRB,VRT produces
// <mnemonic> VRT,VRA,VRB // <mnemonic> VRT,VRA,VRB
VANDL V10, V9, V8 VAND V10, V9, V8
VANDC V15, V14, V13 VANDC V15, V14, V13
VNAND V19, V18, V17 VNAND V19, V18, V17
// Vector OR, VX-form // Vector OR, VX-form
// <MNEMONIC> VRA,VRB,VRT produces // <MNEMONIC> VRA,VRB,VRT produces
// <mnemonic> VRT,VRA,VRB // <mnemonic> VRT,VRA,VRB
VORL V26, V25, V24 VOR V26, V25, V24
VORC V23, V22, V21 VORC V23, V22, V21
VNOR V20, V19, V18 VNOR V20, V19, V18
VXOR V17, V16, V15 VXOR V17, V16, V15
...@@ -739,6 +758,14 @@ label1: ...@@ -739,6 +758,14 @@ label1:
VADDEUQM V4, V3, V2, V1 VADDEUQM V4, V3, V2, V1
VADDECUQ V4, V3, V2, V1 VADDECUQ V4, V3, V2, V1
// Vector polynomial multiply-sum, VX-form
// <MNEMONIC> VRA,VRB,VRT produces
// <mnemonic> VRT,VRA,VRB
VPMSUMB V2, V3, V1
VPMSUMH V2, V3, V1
VPMSUMW V2, V3, V1
VPMSUMD V2, V3, V1
// Vector SUB, VX-form // Vector SUB, VX-form
// <MNEMONIC> VRA,VRB,VRT produces // <MNEMONIC> VRA,VRB,VRT produces
// <mnemonic> VRT,VRA,VRB // <mnemonic> VRT,VRA,VRB
......
...@@ -626,6 +626,9 @@ const ( ...@@ -626,6 +626,9 @@ const (
ACNTLZDCC ACNTLZDCC
ACMPW /* CMP with L=0 */ ACMPW /* CMP with L=0 */
ACMPWU ACMPWU
ACMPB
AFTDIV
AFTSQRT
ADIVD ADIVD
ADIVDCC ADIVDCC
ADIVDE ADIVDE
...@@ -704,6 +707,9 @@ const ( ...@@ -704,6 +707,9 @@ const (
/* more 64-bit operations */ /* more 64-bit operations */
AHRFID AHRFID
APOPCNTD
APOPCNTW
APOPCNTB
/* Vector */ /* Vector */
ALV ALV
...@@ -721,11 +727,9 @@ const ( ...@@ -721,11 +727,9 @@ const (
ASTVX ASTVX
ASTVXL ASTVXL
AVAND AVAND
AVANDL
AVANDC AVANDC
AVNAND AVNAND
AVOR AVOR
AVORL
AVORC AVORC
AVNOR AVNOR
AVXOR AVXOR
...@@ -770,6 +774,11 @@ const ( ...@@ -770,6 +774,11 @@ const (
AVSUBE AVSUBE
AVSUBEUQM AVSUBEUQM
AVSUBECUQ AVSUBECUQ
AVPMSUM
AVPMSUMB
AVPMSUMH
AVPMSUMW
AVPMSUMD
AVR AVR
AVRLB AVRLB
AVRLH AVRLH
...@@ -871,9 +880,13 @@ const ( ...@@ -871,9 +880,13 @@ const (
ASTXSIWX ASTXSIWX
AMFVSR AMFVSR
AMFVSRD AMFVSRD
AMFFPRD
AMFVRD
AMFVSRWZ AMFVSRWZ
AMTVSR AMTVSR
AMTVSRD AMTVSRD
AMTFPRD
AMTVRD
AMTVSRWA AMTVSRWA
AMTVSRWZ AMTVSRWZ
AXXLAND AXXLAND
......
...@@ -248,6 +248,9 @@ var Anames = []string{ ...@@ -248,6 +248,9 @@ var Anames = []string{
"CNTLZDCC", "CNTLZDCC",
"CMPW", "CMPW",
"CMPWU", "CMPWU",
"CMPB",
"FTDIV",
"FTSQRT",
"DIVD", "DIVD",
"DIVDCC", "DIVDCC",
"DIVDE", "DIVDE",
...@@ -321,6 +324,9 @@ var Anames = []string{ ...@@ -321,6 +324,9 @@ var Anames = []string{
"REMDUV", "REMDUV",
"REMDUVCC", "REMDUVCC",
"HRFID", "HRFID",
"POPCNTD",
"POPCNTW",
"POPCNTB",
"LV", "LV",
"LVEBX", "LVEBX",
"LVEHX", "LVEHX",
...@@ -336,11 +342,9 @@ var Anames = []string{ ...@@ -336,11 +342,9 @@ var Anames = []string{
"STVX", "STVX",
"STVXL", "STVXL",
"VAND", "VAND",
"VANDL",
"VANDC", "VANDC",
"VNAND", "VNAND",
"VOR", "VOR",
"VORL",
"VORC", "VORC",
"VNOR", "VNOR",
"VXOR", "VXOR",
...@@ -385,6 +389,11 @@ var Anames = []string{ ...@@ -385,6 +389,11 @@ var Anames = []string{
"VSUBE", "VSUBE",
"VSUBEUQM", "VSUBEUQM",
"VSUBECUQ", "VSUBECUQ",
"VPMSUM",
"VPMSUMB",
"VPMSUMH",
"VPMSUMW",
"VPMSUMD",
"VR", "VR",
"VRLB", "VRLB",
"VRLH", "VRLH",
...@@ -484,9 +493,13 @@ var Anames = []string{ ...@@ -484,9 +493,13 @@ var Anames = []string{
"STXSIWX", "STXSIWX",
"MFVSR", "MFVSR",
"MFVSRD", "MFVSRD",
"MFFPRD",
"MFVRD",
"MFVSRWZ", "MFVSRWZ",
"MTVSR", "MTVSR",
"MTVSRD", "MTVSRD",
"MTFPRD",
"MTVRD",
"MTVSRWA", "MTVSRWA",
"MTVSRWZ", "MTVSRWZ",
"XXLAND", "XXLAND",
......
...@@ -346,6 +346,12 @@ var optab = []Optab{ ...@@ -346,6 +346,12 @@ var optab = []Optab{
{AMOVD, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsrd */ {AMOVD, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsrd */
{AMOVWZ, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsr */ {AMOVWZ, C_REG, C_NONE, C_NONE, C_MSR, 54, 4, 0}, /* mtmsr */
/* Other ISA 2.05+ instructions */
{APOPCNTD, C_REG, C_NONE, C_NONE, C_REG, 93, 4, 0}, /* population count, x-form */
{ACMPB, C_REG, C_REG, C_NONE, C_REG, 92, 4, 0}, /* compare byte, x-form */
{AFTDIV, C_FREG, C_FREG, C_NONE, C_SCON, 92, 4, 0}, /* floating test for sw divide, x-form */
{AFTSQRT, C_FREG, C_NONE, C_NONE, C_SCON, 93, 4, 0}, /* floating test for sw square root, x-form */
/* Vector instructions */ /* Vector instructions */
/* Vector load */ /* Vector load */
...@@ -372,6 +378,9 @@ var optab = []Optab{ ...@@ -372,6 +378,9 @@ var optab = []Optab{
{AVSUBSS, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector subtract signed saturate, vx-form */ {AVSUBSS, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector subtract signed saturate, vx-form */
{AVSUBE, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector subtract extended, va-form */ {AVSUBE, C_VREG, C_VREG, C_VREG, C_VREG, 83, 4, 0}, /* vector subtract extended, va-form */
/* Vector multiply */
{AVPMSUM, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector polynomial multiply & sum, vx-form */
/* Vector rotate */ /* Vector rotate */
{AVR, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector rotate, vx-form */ {AVR, C_VREG, C_VREG, C_NONE, C_VREG, 82, 4, 0}, /* vector rotate, vx-form */
...@@ -428,9 +437,13 @@ var optab = []Optab{ ...@@ -428,9 +437,13 @@ var optab = []Optab{
/* VSX move from VSR */ /* VSX move from VSR */
{AMFVSR, C_VSREG, C_NONE, C_NONE, C_REG, 88, 4, 0}, /* vsx move from vsr, xx1-form */ {AMFVSR, C_VSREG, C_NONE, C_NONE, C_REG, 88, 4, 0}, /* vsx move from vsr, xx1-form */
{AMFVSR, C_FREG, C_NONE, C_NONE, C_REG, 88, 4, 0},
{AMFVSR, C_VREG, C_NONE, C_NONE, C_REG, 88, 4, 0},
/* VSX move to VSR */ /* VSX move to VSR */
{AMTVSR, C_REG, C_NONE, C_NONE, C_VSREG, 88, 4, 0}, /* vsx move to vsr, xx1-form */ {AMTVSR, C_REG, C_NONE, C_NONE, C_VSREG, 88, 4, 0}, /* vsx move to vsr, xx1-form */
{AMTVSR, C_REG, C_NONE, C_NONE, C_FREG, 88, 4, 0},
{AMTVSR, C_REG, C_NONE, C_NONE, C_VREG, 88, 4, 0},
/* VSX logical */ /* VSX logical */
{AXXLAND, C_VSREG, C_VSREG, C_NONE, C_VSREG, 90, 4, 0}, /* vsx and, xx3-form */ {AXXLAND, C_VSREG, C_VSREG, C_NONE, C_VSREG, 90, 4, 0}, /* vsx and, xx3-form */
...@@ -1161,6 +1174,10 @@ func buildop(ctxt *obj.Link) { ...@@ -1161,6 +1174,10 @@ func buildop(ctxt *obj.Link) {
opset(ADIVDUVCC, r0) opset(ADIVDUVCC, r0)
opset(ADIVDUCC, r0) opset(ADIVDUCC, r0)
case APOPCNTD:
opset(APOPCNTW, r0)
opset(APOPCNTB, r0)
case AMOVBZ: /* lbz, stz, rlwm(r/r), lhz, lha, stz, and x variants */ case AMOVBZ: /* lbz, stz, rlwm(r/r), lhz, lha, stz, and x variants */
opset(AMOVH, r0) opset(AMOVH, r0)
...@@ -1192,12 +1209,12 @@ func buildop(ctxt *obj.Link) { ...@@ -1192,12 +1209,12 @@ func buildop(ctxt *obj.Link) {
opset(ASTVXL, r0) opset(ASTVXL, r0)
case AVAND: /* vand, vandc, vnand */ case AVAND: /* vand, vandc, vnand */
opset(AVANDL, r0) opset(AVAND, r0)
opset(AVANDC, r0) opset(AVANDC, r0)
opset(AVNAND, r0) opset(AVNAND, r0)
case AVOR: /* vor, vorc, vxor, vnor, veqv */ case AVOR: /* vor, vorc, vxor, vnor, veqv */
opset(AVORL, r0) opset(AVOR, r0)
opset(AVORC, r0) opset(AVORC, r0)
opset(AVXOR, r0) opset(AVXOR, r0)
opset(AVNOR, r0) opset(AVNOR, r0)
...@@ -1253,6 +1270,12 @@ func buildop(ctxt *obj.Link) { ...@@ -1253,6 +1270,12 @@ func buildop(ctxt *obj.Link) {
opset(AVSUBEUQM, r0) opset(AVSUBEUQM, r0)
opset(AVSUBECUQ, r0) opset(AVSUBECUQ, r0)
case AVPMSUM: /* vpmsumb, vpmsumh, vpmsumw, vpmsumd */
opset(AVPMSUMB, r0)
opset(AVPMSUMH, r0)
opset(AVPMSUMW, r0)
opset(AVPMSUMD, r0)
case AVR: /* vrlb, vrlh, vrlw, vrld */ case AVR: /* vrlb, vrlh, vrlw, vrld */
opset(AVRLB, r0) opset(AVRLB, r0)
opset(AVRLH, r0) opset(AVRLH, r0)
...@@ -1375,12 +1398,16 @@ func buildop(ctxt *obj.Link) { ...@@ -1375,12 +1398,16 @@ func buildop(ctxt *obj.Link) {
case ASTXSI: /* stxsiwx */ case ASTXSI: /* stxsiwx */
opset(ASTXSIWX, r0) opset(ASTXSIWX, r0)
case AMFVSR: /* mfvsrd, mfvsrwz */ case AMFVSR: /* mfvsrd, mfvsrwz (and extended mnemonics) */
opset(AMFVSRD, r0) opset(AMFVSRD, r0)
opset(AMFFPRD, r0)
opset(AMFVRD, r0)
opset(AMFVSRWZ, r0) opset(AMFVSRWZ, r0)
case AMTVSR: /* mtvsrd, mtvsrwa, mtvsrwz */ case AMTVSR: /* mtvsrd, mtvsrwa, mtvsrwz (and extended mnemonics) */
opset(AMTVSRD, r0) opset(AMTVSRD, r0)
opset(AMTFPRD, r0)
opset(AMTVRD, r0)
opset(AMTVSRWA, r0) opset(AMTVSRWA, r0)
opset(AMTVSRWZ, r0) opset(AMTVSRWZ, r0)
...@@ -1710,6 +1737,15 @@ func buildop(ctxt *obj.Link) { ...@@ -1710,6 +1737,15 @@ func buildop(ctxt *obj.Link) {
case ACMPU: case ACMPU:
opset(ACMPWU, r0) opset(ACMPWU, r0)
case ACMPB:
opset(ACMPB, r0)
case AFTDIV:
opset(AFTDIV, r0)
case AFTSQRT:
opset(AFTSQRT, r0)
case AADD, case AADD,
AANDCC, /* and. Rb,Rs,Ra; andi. $uimm,Rs,Ra; andis. $uimm,Rs,Ra */ AANDCC, /* and. Rb,Rs,Ra; andi. $uimm,Rs,Ra; andis. $uimm,Rs,Ra */
AFMOVSX, AFMOVSX,
...@@ -1783,7 +1819,7 @@ func AOP_RRR(op uint32, d uint32, a uint32, b uint32) uint32 { ...@@ -1783,7 +1819,7 @@ func AOP_RRR(op uint32, d uint32, a uint32, b uint32) uint32 {
return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11 return op | (d&31)<<21 | (a&31)<<16 | (b&31)<<11
} }
/* VX-form 2-register operands, r/r/none */ /* VX-form 2-register operands, r/none/r */
func AOP_RR(op uint32, d uint32, a uint32) uint32 { func AOP_RR(op uint32, d uint32, a uint32) uint32 {
return op | (d&31)<<21 | (a&31)<<11 return op | (d&31)<<21 | (a&31)<<11
} }
...@@ -1881,6 +1917,10 @@ func OP_RLW(op uint32, a uint32, s uint32, sh uint32, mb uint32, me uint32) uint ...@@ -1881,6 +1917,10 @@ func OP_RLW(op uint32, a uint32, s uint32, sh uint32, mb uint32, me uint32) uint
return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | (mb&31)<<6 | (me&31)<<1 return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | (mb&31)<<6 | (me&31)<<1
} }
func AOP_RLDIC(op uint32, a uint32, s uint32, sh uint32, m uint32) uint32 {
return op | (s&31)<<21 | (a&31)<<16 | (sh&31)<<11 | ((sh&32)>>5)<<1 | (m&31)<<6 | ((m&32)>>5)<<5
}
func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 { func AOP_ISEL(op uint32, t uint32, a uint32, b uint32, bc uint32) uint32 {
return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6 return op | (t&31)<<21 | (a&31)<<16 | (b&31)<<11 | (bc&0x1F)<<6
} }
...@@ -2353,6 +2393,11 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { ...@@ -2353,6 +2393,11 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
if mask[1] != 63 { if mask[1] != 63 {
ctxt.Diag("invalid mask for rotate: %x (end != bit 63)\n%v", uint64(d), p) ctxt.Diag("invalid mask for rotate: %x (end != bit 63)\n%v", uint64(d), p)
} }
o1 = LOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
o1 |= (uint32(a) & 31) << 6
if a&0x20 != 0 {
o1 |= 1 << 5 /* mb[5] is top bit */
}
case ARLDCR, ARLDCRCC: case ARLDCR, ARLDCRCC:
var mask [2]uint8 var mask [2]uint8
...@@ -2362,22 +2407,28 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { ...@@ -2362,22 +2407,28 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
if mask[0] != 0 { if mask[0] != 0 {
ctxt.Diag("invalid mask for rotate: %x (start != 0)\n%v", uint64(d), p) ctxt.Diag("invalid mask for rotate: %x (start != 0)\n%v", uint64(d), p)
} }
o1 = LOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
o1 |= (uint32(a) & 31) << 6
if a&0x20 != 0 {
o1 |= 1 << 5 /* mb[5] is top bit */
}
// These opcodes use a shift count like the ppc64 asm, no mask conversion done // These opcodes use a shift count like the ppc64 asm, no mask conversion done
case ARLDICR, ARLDICRCC, ARLDICL, ARLDICLCC: case ARLDICR, ARLDICRCC:
a = int(d) me := int(d)
sh := regoff(ctxt, &p.From)
o1 = AOP_RLDIC(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(me))
case ARLDICL, ARLDICLCC:
mb := int(d)
sh := regoff(ctxt, &p.From)
o1 = AOP_RLDIC(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(sh), uint32(mb))
default: default:
ctxt.Diag("unexpected op in rldc case\n%v", p) ctxt.Diag("unexpected op in rldc case\n%v", p)
a = 0 a = 0
} }
o1 = LOP_RRR(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(r), uint32(p.From.Reg))
o1 |= (uint32(a) & 31) << 6
if a&0x20 != 0 {
o1 |= 1 << 5 /* mb[5] is top bit */
}
case 17, /* bc bo,bi,lbra (same for now) */ case 17, /* bc bo,bi,lbra (same for now) */
16: /* bc bo,bi,sbra */ 16: /* bc bo,bi,sbra */
a := 0 a := 0
...@@ -3170,8 +3221,24 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { ...@@ -3170,8 +3221,24 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
/* 2-register operand order: XS, RA or RA, XT */ /* 2-register operand order: XS, RA or RA, XT */
xt := int32(p.To.Reg) xt := int32(p.To.Reg)
xs := int32(p.From.Reg) xs := int32(p.From.Reg)
if REG_VS0 <= xt && xt <= REG_VS63 { /* We need to treat the special case of extended mnemonics that may have a FREG/VREG as an argument */
if REG_V0 <= xt && xt <= REG_V31 {
/* Convert V0-V31 to VS32-VS63 */
xt = xt + 64
o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
} else if REG_F0 <= xt && xt <= REG_F31 {
/* Convert F0-F31 to VS0-VS31 */
xt = xt + 64
o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
} else if REG_VS0 <= xt && xt <= REG_VS63 {
o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg)) o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg))
} else if REG_V0 <= xs && xs <= REG_V31 {
/* Likewise for XS */
xs = xs + 64
o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
} else if REG_F0 <= xs && xs <= REG_F31 {
xs = xs + 64
o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
} else if REG_VS0 <= xs && xs <= REG_VS63 { } else if REG_VS0 <= xs && xs <= REG_VS63 {
o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg)) o1 = AOP_XX1(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
} }
...@@ -3199,6 +3266,30 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) { ...@@ -3199,6 +3266,30 @@ func asmout(ctxt *obj.Link, p *obj.Prog, o *Optab, out []uint32) {
/* 3-register operand order: XA, XB, XC, XT */ /* 3-register operand order: XA, XB, XC, XT */
o1 = AOP_XX4(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), uint32(p.From3.Reg)) o1 = AOP_XX4(oprrr(ctxt, p.As), uint32(p.To.Reg), uint32(p.From.Reg), uint32(p.Reg), uint32(p.From3.Reg))
case 92: /* X-form instructions, 3-operands */
if p.To.Type == obj.TYPE_CONST {
/* imm reg reg */
/* operand order: FRA, FRB, BF */
bf := int(regoff(ctxt, &p.To)) << 2
o1 = AOP_RRR(opirr(ctxt, p.As), uint32(bf), uint32(p.From.Reg), uint32(p.Reg))
} else if p.To.Type == obj.TYPE_REG {
/* reg reg reg */
/* operand order: RS, RB, RA */
o1 = AOP_RRR(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
}
case 93: /* X-form instructions, 2-operands */
if p.To.Type == obj.TYPE_CONST {
/* imm reg */
/* operand order: FRB, BF */
bf := int(regoff(ctxt, &p.To)) << 2
o1 = AOP_RR(opirr(ctxt, p.As), uint32(bf), uint32(p.From.Reg))
} else if p.Reg == 0 {
/* popcnt* r,r, X-form */
/* operand order: RS, RA */
o1 = AOP_RRR(oprrr(ctxt, p.As), uint32(p.From.Reg), uint32(p.To.Reg), uint32(p.Reg))
}
} }
out[0] = o1 out[0] = o1
...@@ -3281,6 +3372,8 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 { ...@@ -3281,6 +3372,8 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
return OPVCC(31, 0, 0, 0) /* L=0 */ return OPVCC(31, 0, 0, 0) /* L=0 */
case ACMPWU: case ACMPWU:
return OPVCC(31, 32, 0, 0) return OPVCC(31, 32, 0, 0)
case ACMPB:
return OPVCC(31, 508, 0, 0) /* cmpb - v2.05 */
case ACNTLZW: case ACNTLZW:
return OPVCC(31, 26, 0, 0) return OPVCC(31, 26, 0, 0)
...@@ -3621,6 +3714,13 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 { ...@@ -3621,6 +3714,13 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
case AORNCC: case AORNCC:
return OPVCC(31, 412, 0, 1) return OPVCC(31, 412, 0, 1)
case APOPCNTD:
return OPVCC(31, 506, 0, 0) /* popcntd - v2.06 */
case APOPCNTW:
return OPVCC(31, 378, 0, 0) /* popcntw - v2.06 */
case APOPCNTB:
return OPVCC(31, 122, 0, 0) /* popcntb - v2.02 */
case ARFI: case ARFI:
return OPVCC(19, 50, 0, 0) return OPVCC(19, 50, 0, 0)
case ARFCI: case ARFCI:
...@@ -3757,14 +3857,14 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 { ...@@ -3757,14 +3857,14 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
/* Vector (VMX/Altivec) instructions */ /* Vector (VMX/Altivec) instructions */
/* ISA 2.03 enables these for PPC970. For POWERx processors, these */ /* ISA 2.03 enables these for PPC970. For POWERx processors, these */
/* are enabled starting at POWER6 (ISA 2.05). */ /* are enabled starting at POWER6 (ISA 2.05). */
case AVANDL: case AVAND:
return OPVX(4, 1028, 0, 0) /* vand - v2.03 */ return OPVX(4, 1028, 0, 0) /* vand - v2.03 */
case AVANDC: case AVANDC:
return OPVX(4, 1092, 0, 0) /* vandc - v2.03 */ return OPVX(4, 1092, 0, 0) /* vandc - v2.03 */
case AVNAND: case AVNAND:
return OPVX(4, 1412, 0, 0) /* vnand - v2.07 */ return OPVX(4, 1412, 0, 0) /* vnand - v2.07 */
case AVORL: case AVOR:
return OPVX(4, 1156, 0, 0) /* vor - v2.03 */ return OPVX(4, 1156, 0, 0) /* vor - v2.03 */
case AVORC: case AVORC:
return OPVX(4, 1348, 0, 0) /* vorc - v2.07 */ return OPVX(4, 1348, 0, 0) /* vorc - v2.07 */
...@@ -3810,6 +3910,15 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 { ...@@ -3810,6 +3910,15 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
case AVADDECUQ: case AVADDECUQ:
return OPVX(4, 61, 0, 0) /* vaddecuq - v2.07 */ return OPVX(4, 61, 0, 0) /* vaddecuq - v2.07 */
case AVPMSUMB:
return OPVX(4, 1032, 0, 0) /* vpmsumb - v2.07 */
case AVPMSUMH:
return OPVX(4, 1096, 0, 0) /* vpmsumh - v2.07 */
case AVPMSUMW:
return OPVX(4, 1160, 0, 0) /* vpmsumw - v2.07 */
case AVPMSUMD:
return OPVX(4, 1224, 0, 0) /* vpmsumd - v2.07 */
case AVSUBUBM: case AVSUBUBM:
return OPVX(4, 1024, 0, 0) /* vsububm - v2.03 */ return OPVX(4, 1024, 0, 0) /* vsububm - v2.03 */
case AVSUBUHM: case AVSUBUHM:
...@@ -3976,12 +4085,12 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 { ...@@ -3976,12 +4085,12 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
/* Vector scalar (VSX) instructions */ /* Vector scalar (VSX) instructions */
/* ISA 2.06 enables these for POWER7. */ /* ISA 2.06 enables these for POWER7. */
case AMFVSRD: case AMFVSRD, AMFVRD, AMFFPRD:
return OPVXX1(31, 51, 0) /* mfvsrd - v2.07 */ return OPVXX1(31, 51, 0) /* mfvsrd - v2.07 */
case AMFVSRWZ: case AMFVSRWZ:
return OPVXX1(31, 115, 0) /* mfvsrwz - v2.07 */ return OPVXX1(31, 115, 0) /* mfvsrwz - v2.07 */
case AMTVSRD: case AMTVSRD, AMTFPRD, AMTVRD:
return OPVXX1(31, 179, 0) /* mtvsrd - v2.07 */ return OPVXX1(31, 179, 0) /* mtvsrd - v2.07 */
case AMTVSRWA: case AMTVSRWA:
return OPVXX1(31, 211, 0) /* mtvsrwa - v2.07 */ return OPVXX1(31, 211, 0) /* mtvsrwa - v2.07 */
...@@ -4260,6 +4369,11 @@ func opirr(ctxt *obj.Link, a obj.As) uint32 { ...@@ -4260,6 +4369,11 @@ func opirr(ctxt *obj.Link, a obj.As) uint32 {
return OPVX(4, 908, 0, 0) /* vspltisw - v2.03 */ return OPVX(4, 908, 0, 0) /* vspltisw - v2.03 */
/* End of vector instructions */ /* End of vector instructions */
case AFTDIV:
return OPVCC(63, 128, 0, 0) /* ftdiv - v2.06 */
case AFTSQRT:
return OPVCC(63, 160, 0, 0) /* ftsqrt - v2.06 */
case AXOR: case AXOR:
return OPVCC(26, 0, 0, 0) /* XORIL */ return OPVCC(26, 0, 0, 0) /* XORIL */
case -AXOR: case -AXOR:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment