Commit 4636d022 authored by Cherry Zhang's avatar Cherry Zhang

[dev.ssa] cmd/compile: handle 64-bit shifts on ARM

Also fix a mistake in previous CL about x8 and x16 shifts:
the shift needs ZeroExt.

Progress on SSA for ARM. Still not complete.

Updates #15365.

Change-Id: Ibc352760023d38bc6b9c5251e929fe26e016637a
Reviewed-on: https://go-review.googlesource.com/23486Reviewed-by: default avatarDavid Chase <drchase@google.com>
parent 90883091
......@@ -277,6 +277,12 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.Reg = gc.SSARegNum(v.Args[0])
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
case ssa.OpARMSRRconst:
p := gc.Prog(arm.AMOVW)
p.From.Type = obj.TYPE_SHIFT
p.From.Offset = int64(gc.SSARegNum(v.Args[0])&0xf) | arm.SHIFT_RR | (v.AuxInt&31)<<7
p.To.Type = obj.TYPE_REG
p.To.Reg = gc.SSARegNum(v)
case ssa.OpARMHMUL,
ssa.OpARMHMULU:
// 32-bit high multiplication
......@@ -483,6 +489,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p4 := gc.Prog(arm.ABLT)
p4.To.Type = obj.TYPE_BRANCH
gc.Patch(p4, p)
case ssa.OpARMLoweredZeromask:
// int32(arg0>>1 - arg0) >> 31
// RSB r0>>1, r0, r
// SRA $31, r, r
r0 := gc.SSARegNum(v.Args[0])
r := gc.SSARegNum(v)
p := gc.Prog(arm.ARSB)
p.From.Type = obj.TYPE_SHIFT
p.From.Offset = int64(r0&0xf) | arm.SHIFT_LR | 1<<7 // unsigned r0>>1
p.Reg = r0
p.To.Type = obj.TYPE_REG
p.To.Reg = r
p = gc.Prog(arm.ASRA)
p.From.Type = obj.TYPE_CONST
p.From.Offset = 31
p.Reg = r
p.To.Type = obj.TYPE_REG
p.To.Reg = r
case ssa.OpVarDef:
gc.Gvardef(v.Aux.(*gc.Node))
case ssa.OpVarKill:
......
......@@ -61,40 +61,40 @@
// shifts
(Lsh32x32 x y) -> (SLL x y)
(Lsh32x16 x y) -> (SLL x y)
(Lsh32x8 x y) -> (SLL x y)
(Lsh32x16 x y) -> (SLL x (ZeroExt16to32 y))
(Lsh32x8 x y) -> (SLL x (ZeroExt8to32 y))
(Lsh16x32 x y) -> (SLL x y)
(Lsh16x16 x y) -> (SLL x y)
(Lsh16x8 x y) -> (SLL x y)
(Lsh16x16 x y) -> (SLL x (ZeroExt16to32 y))
(Lsh16x8 x y) -> (SLL x (ZeroExt8to32 y))
(Lsh8x32 x y) -> (SLL x y)
(Lsh8x16 x y) -> (SLL x y)
(Lsh8x8 x y) -> (SLL x y)
(Lsh8x16 x y) -> (SLL x (ZeroExt16to32 y))
(Lsh8x8 x y) -> (SLL x (ZeroExt8to32 y))
(Rsh32Ux32 x y) -> (SRL x y)
(Rsh32Ux16 x y) -> (SRL x y)
(Rsh32Ux8 x y) -> (SRL x y)
(Rsh32Ux16 x y) -> (SRL x (ZeroExt16to32 y))
(Rsh32Ux8 x y) -> (SRL x (ZeroExt8to32 y))
(Rsh16Ux32 x y) -> (SRL (ZeroExt16to32 x) y)
(Rsh16Ux16 x y) -> (SRL (ZeroExt16to32 x) y)
(Rsh16Ux8 x y) -> (SRL (ZeroExt16to32 x) y)
(Rsh16Ux16 x y) -> (SRL (ZeroExt16to32 x) (ZeroExt16to32 y))
(Rsh16Ux8 x y) -> (SRL (ZeroExt16to32 x) (ZeroExt8to32 y))
(Rsh8Ux32 x y) -> (SRL (ZeroExt8to32 x) y)
(Rsh8Ux16 x y) -> (SRL (ZeroExt8to32 x) y)
(Rsh8Ux8 x y) -> (SRL (ZeroExt8to32 x) y)
(Rsh8Ux16 x y) -> (SRL (ZeroExt8to32 x) (ZeroExt16to32 y))
(Rsh8Ux8 x y) -> (SRL (ZeroExt8to32 x) (ZeroExt8to32 y))
(Rsh32x32 x y) -> (SRA x y)
(Rsh32x16 x y) -> (SRA x y)
(Rsh32x8 x y) -> (SRA x y)
(Rsh32x16 x y) -> (SRA x (ZeroExt16to32 y))
(Rsh32x8 x y) -> (SRA x (ZeroExt8to32 y))
(Rsh16x32 x y) -> (SRA (SignExt16to32 x) y)
(Rsh16x16 x y) -> (SRA (SignExt16to32 x) y)
(Rsh16x8 x y) -> (SRA (SignExt16to32 x) y)
(Rsh16x16 x y) -> (SRA (SignExt16to32 x) (ZeroExt16to32 y))
(Rsh16x8 x y) -> (SRA (SignExt16to32 x) (ZeroExt8to32 y))
(Rsh8x32 x y) -> (SRA (SignExt8to32 x) y)
(Rsh8x16 x y) -> (SRA (SignExt8to32 x) y)
(Rsh8x8 x y) -> (SRA (SignExt8to32 x) y)
(Rsh8x16 x y) -> (SRA (SignExt8to32 x) (ZeroExt16to32 y))
(Rsh8x8 x y) -> (SRA (SignExt8to32 x) (ZeroExt8to32 y))
// constant shifts
// generic opt rewrites all constant shifts to shift by Const64
......@@ -121,6 +121,10 @@
(Rsh16x64 <t> x (Const64 [c])) && uint64(c) >= 16 -> (SRAconst (SLLconst <t> x [16]) [31])
(Rsh8x64 <t> x (Const64 [c])) && uint64(c) >= 8 -> (SRAconst (SLLconst <t> x [24]) [31])
(Lrot32 x [c]) -> (SRRconst x [32-c&31])
(Lrot16 <t> x [c]) -> (OR (SLLconst <t> x [c&15]) (SRLconst <t> x [16-c&15]))
(Lrot8 <t> x [c]) -> (OR (SLLconst <t> x [c&7]) (SRLconst <t> x [8-c&7]))
// constants
(Const8 [val]) -> (MOVWconst [val])
(Const16 [val]) -> (MOVWconst [val])
......@@ -144,6 +148,7 @@
(SignExt16to32 x) -> (MOVHreg x)
(Signmask x) -> (SRAconst x [31])
(Zeromask x) -> (LoweredZeromask x)
// comparisons
(Eq8 x y) -> (Equal (CMP (ZeroExt8to32 x) (ZeroExt8to32 y)))
......
......@@ -129,6 +129,7 @@ func init() {
{name: "SRLconst", argLength: 1, reg: gp11, asm: "SRL", aux: "Int32"}, // arg0 >> auxInt, unsigned
{name: "SRA", argLength: 2, reg: gp21cf, asm: "SRA"}, // arg0 >> arg1, signed, results 0/-1 for large shift
{name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed
{name: "SRRconst", argLength: 1, reg: gp11, aux: "Int32"}, // arg0 right rotate by auxInt bits
{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1
{name: "CMPconst", argLength: 1, reg: gp1flags, asm: "CMP", aux: "Int32", typ: "Flags"}, // arg0 compare to auxInt
......@@ -180,6 +181,8 @@ func init() {
{name: "LoweredSelect0", argLength: 1, reg: regInfo{inputs: []regMask{}, outputs: []regMask{buildReg("R0")}}}, // the first component of a tuple, implicitly in R0, arg0=tuple
{name: "LoweredSelect1", argLength: 1, reg: gp11, resultInArg0: true}, // the second component of a tuple, arg0=tuple
{name: "LoweredZeromask", argLength: 1, reg: gp11}, // 0 if arg0 == 1, 0xffffffff if arg0 != 0
// duffzero
// arg0 = address of memory to zero (in R1, changed as side effect)
// arg1 = value to store (always zero)
......
......@@ -430,7 +430,8 @@ var genericOps = []opData{
{name: "Mul32uhilo", argLength: 2, typ: "(UInt32,UInt32)"}, // arg0 * arg1, returns (hi, lo)
{name: "Signmask", argLength: 1, typ: "Int32"}, // 0 if arg0 >= 0, -1 if arg0 < 0
{name: "Signmask", argLength: 1, typ: "Int32"}, // 0 if arg0 >= 0, -1 if arg0 < 0
{name: "Zeromask", argLength: 1, typ: "UInt32"}, // 0 if arg0 == 0, 0xffffffff if arg0 != 0
// pseudo-ops for breaking Tuple
{name: "Select0", argLength: 1}, // the first component of a tuple
......
......@@ -353,6 +353,7 @@ const (
OpARMSRLconst
OpARMSRA
OpARMSRAconst
OpARMSRRconst
OpARMCMP
OpARMCMPconst
OpARMCMN
......@@ -393,6 +394,7 @@ const (
OpARMCarry
OpARMLoweredSelect0
OpARMLoweredSelect1
OpARMLoweredZeromask
OpARMDUFFZERO
OpARMDUFFCOPY
OpARMLoweredZero
......@@ -693,6 +695,7 @@ const (
OpSub32withcarry
OpMul32uhilo
OpSignmask
OpZeromask
OpSelect0
OpSelect1
)
......@@ -4313,6 +4316,19 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "SRRconst",
auxType: auxInt32,
argLen: 1,
reg: regInfo{
inputs: []inputInfo{
{0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
},
outputs: []regMask{
5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
},
},
},
{
name: "CMP",
argLen: 2,
......@@ -4805,6 +4821,18 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "LoweredZeromask",
argLen: 1,
reg: regInfo{
inputs: []inputInfo{
{0, 5119}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
},
outputs: []regMask{
5119, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12
},
},
},
{
name: "DUFFZERO",
auxType: auxInt64,
......@@ -6391,6 +6419,11 @@ var opcodeTable = [...]opInfo{
argLen: 1,
generic: true,
},
{
name: "Zeromask",
argLen: 1,
generic: true,
},
{
name: "Select0",
argLen: 1,
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment