Commit 28edaf45 authored by Lynn Boger's avatar Lynn Boger

cmd/compile,test: combine byte loads and stores on ppc64le

CL 74410 added rules to combine consecutive byte loads and
stores when the byte order was little endian for ppc64le. This
is the corresponding change for bytes that are in big endian order.
These rules are all intended for a little endian target arch.

This adds new testcases in test/codegen/memcombine.go

Fixes #22496
Updates #24242

Benchmark improvement for encoding/binary:
name                      old time/op    new time/op    delta
ReadSlice1000Int32s-16      11.0µs ± 0%     9.0µs ± 0%  -17.47%  (p=0.029 n=4+4)
ReadStruct-16               2.47µs ± 1%    2.48µs ± 0%   +0.67%  (p=0.114 n=4+4)
ReadInts-16                  642ns ± 1%     630ns ± 1%   -2.02%  (p=0.029 n=4+4)
WriteInts-16                 654ns ± 0%     653ns ± 1%   -0.08%  (p=0.629 n=4+4)
WriteSlice1000Int32s-16     8.75µs ± 0%    8.20µs ± 0%   -6.19%  (p=0.029 n=4+4)
PutUint16-16                1.16ns ± 0%    0.93ns ± 0%  -19.83%  (p=0.029 n=4+4)
PutUint32-16                1.16ns ± 0%    0.93ns ± 0%  -19.83%  (p=0.029 n=4+4)
PutUint64-16                1.85ns ± 0%    0.93ns ± 0%  -49.73%  (p=0.029 n=4+4)
LittleEndianPutUint16-16    1.03ns ± 0%    0.93ns ± 0%   -9.71%  (p=0.029 n=4+4)
LittleEndianPutUint32-16    0.93ns ± 0%    0.93ns ± 0%     ~     (all equal)
LittleEndianPutUint64-16    0.93ns ± 0%    0.93ns ± 0%     ~     (all equal)
PutUvarint32-16             43.0ns ± 0%    43.1ns ± 0%   +0.12%  (p=0.429 n=4+4)
PutUvarint64-16              174ns ± 0%     175ns ± 0%   +0.29%  (p=0.429 n=4+4)

Updates made to functions in gcm.go to enable their matching. An existing
testcase prevents these functions from being replaced by those in encoding/binary
due to import dependencies.

Change-Id: Idb3bd1e6e7b12d86cd828fb29cb095848a3e485a
Reviewed-on: https://go-review.googlesource.com/98136
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
parent f31a18de
...@@ -619,35 +619,31 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -619,35 +619,31 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Reg = ppc64.REGTMP // discard result p.To.Reg = ppc64.REGTMP // discard result
case ssa.OpPPC64MOVDaddr: case ssa.OpPPC64MOVDaddr:
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
var wantreg string
// Suspect comment, copied from ARM code
// MOVD $sym+off(base), R
// the assembler expands it as the following:
// - base is SP: add constant offset to SP
// when constant is large, tmp register (R11) may be used
// - base is SB: load external address from constant pool (use relocation)
switch v.Aux.(type) { switch v.Aux.(type) {
default: default:
v.Fatalf("aux is of unknown type %T", v.Aux) v.Fatalf("aux in MOVDaddr is of unknown type %T", v.Aux)
case *obj.LSym:
wantreg = "SB"
gc.AddAux(&p.From, v)
case *gc.Node:
wantreg = "SP"
gc.AddAux(&p.From, v)
case nil: case nil:
// No sym, just MOVD $off(SP), R // If aux offset and aux int are both 0, and the same
wantreg = "SP" // input and output regs are used, no instruction
p.From.Offset = v.AuxInt // needs to be generated, since it would just be
} // addi rx, rx, 0.
if reg := v.Args[0].RegName(); reg != wantreg { if v.AuxInt != 0 || v.Args[0].Reg() != v.Reg() {
v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg) p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
p.From.Offset = v.AuxInt
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
}
case *obj.LSym, *gc.Node:
p := s.Prog(ppc64.AMOVD)
p.From.Type = obj.TYPE_ADDR
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
gc.AddAux(&p.From, v)
} }
case ssa.OpPPC64MOVDconst: case ssa.OpPPC64MOVDconst:
...@@ -729,6 +725,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { ...@@ -729,6 +725,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
p.To.Type = obj.TYPE_REG p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg() p.To.Reg = v.Reg()
case ssa.OpPPC64MOVDBRload, ssa.OpPPC64MOVWBRload, ssa.OpPPC64MOVHBRload:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
case ssa.OpPPC64MOVDBRstore, ssa.OpPPC64MOVWBRstore, ssa.OpPPC64MOVHBRstore:
p := s.Prog(v.Op.Asm())
p.To.Type = obj.TYPE_MEM
p.To.Reg = v.Args[0].Reg()
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[1].Reg()
case ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload: case ssa.OpPPC64FMOVDload, ssa.OpPPC64FMOVSload:
p := s.Prog(v.Op.Asm()) p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_MEM p.From.Type = obj.TYPE_MEM
......
...@@ -1586,6 +1586,12 @@ const ( ...@@ -1586,6 +1586,12 @@ const (
OpPPC64MOVWload OpPPC64MOVWload
OpPPC64MOVWZload OpPPC64MOVWZload
OpPPC64MOVDload OpPPC64MOVDload
OpPPC64MOVDBRload
OpPPC64MOVWBRload
OpPPC64MOVHBRload
OpPPC64MOVDBRstore
OpPPC64MOVWBRstore
OpPPC64MOVHBRstore
OpPPC64FMOVDload OpPPC64FMOVDload
OpPPC64FMOVSload OpPPC64FMOVSload
OpPPC64MOVBstore OpPPC64MOVBstore
...@@ -20957,6 +20963,96 @@ var opcodeTable = [...]opInfo{ ...@@ -20957,6 +20963,96 @@ var opcodeTable = [...]opInfo{
}, },
}, },
}, },
{
name: "MOVDBRload",
auxType: auxSymOff,
argLen: 2,
faultOnNilArg0: true,
symEffect: SymRead,
asm: ppc64.AMOVDBR,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "MOVWBRload",
auxType: auxSymOff,
argLen: 2,
faultOnNilArg0: true,
symEffect: SymRead,
asm: ppc64.AMOVWBR,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "MOVHBRload",
auxType: auxSymOff,
argLen: 2,
faultOnNilArg0: true,
symEffect: SymRead,
asm: ppc64.AMOVHBR,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "MOVDBRstore",
auxType: auxSymOff,
argLen: 3,
faultOnNilArg0: true,
symEffect: SymWrite,
asm: ppc64.AMOVDBR,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "MOVWBRstore",
auxType: auxSymOff,
argLen: 3,
faultOnNilArg0: true,
symEffect: SymWrite,
asm: ppc64.AMOVWBR,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{
name: "MOVHBRstore",
auxType: auxSymOff,
argLen: 3,
faultOnNilArg0: true,
symEffect: SymWrite,
asm: ppc64.AMOVHBR,
reg: regInfo{
inputs: []inputInfo{
{0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
{1, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
},
},
},
{ {
name: "FMOVDload", name: "FMOVDload",
auxType: auxSymOff, auxType: auxSymOff,
...@@ -21134,7 +21230,7 @@ var opcodeTable = [...]opInfo{ ...@@ -21134,7 +21230,7 @@ var opcodeTable = [...]opInfo{
asm: ppc64.AMOVD, asm: ppc64.AMOVD,
reg: regInfo{ reg: regInfo{
inputs: []inputInfo{ inputs: []inputInfo{
{0, 6}, // SP SB {0, 1073733630}, // SP SB R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
}, },
outputs: []outputInfo{ outputs: []outputInfo{
{0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29 {0, 1073733624}, // R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R14 R15 R16 R17 R18 R19 R20 R21 R22 R23 R24 R25 R26 R27 R28 R29
......
...@@ -375,7 +375,7 @@ func (s *regAllocState) allocReg(mask regMask, v *Value) register { ...@@ -375,7 +375,7 @@ func (s *regAllocState) allocReg(mask regMask, v *Value) register {
mask &= s.allocatable mask &= s.allocatable
mask &^= s.nospill mask &^= s.nospill
if mask == 0 { if mask == 0 {
s.f.Fatalf("no register available for %s", v) s.f.Fatalf("no register available for %s", v.LongString())
} }
// Pick an unused register if one is available. // Pick an unused register if one is available.
......
...@@ -4973,6 +4973,8 @@ func (c *ctxt9) opstorex(a obj.As) uint32 { ...@@ -4973,6 +4973,8 @@ func (c *ctxt9) opstorex(a obj.As) uint32 {
return OPVCC(31, 661, 0, 0) /* stswx */ return OPVCC(31, 661, 0, 0) /* stswx */
case AMOVWBR: case AMOVWBR:
return OPVCC(31, 662, 0, 0) /* stwbrx */ return OPVCC(31, 662, 0, 0) /* stwbrx */
case AMOVDBR:
return OPVCC(31, 660, 0, 0) /* stdbrx */
case ASTBCCC: case ASTBCCC:
return OPVCC(31, 694, 0, 1) /* stbcx. */ return OPVCC(31, 694, 0, 1) /* stbcx. */
case ASTWCCC: case ASTWCCC:
......
...@@ -413,6 +413,7 @@ func (g *gcm) auth(out, ciphertext, additionalData []byte, tagMask *[gcmTagSize] ...@@ -413,6 +413,7 @@ func (g *gcm) auth(out, ciphertext, additionalData []byte, tagMask *[gcmTagSize]
} }
func getUint64(data []byte) uint64 { func getUint64(data []byte) uint64 {
_ = data[7] // bounds check hint to compiler; see golang.org/issue/14808
r := uint64(data[0])<<56 | r := uint64(data[0])<<56 |
uint64(data[1])<<48 | uint64(data[1])<<48 |
uint64(data[2])<<40 | uint64(data[2])<<40 |
...@@ -425,6 +426,7 @@ func getUint64(data []byte) uint64 { ...@@ -425,6 +426,7 @@ func getUint64(data []byte) uint64 {
} }
func putUint64(out []byte, v uint64) { func putUint64(out []byte, v uint64) {
_ = out[7] // bounds check hint to compiler; see golang.org/issue/14808
out[0] = byte(v >> 56) out[0] = byte(v >> 56)
out[1] = byte(v >> 48) out[1] = byte(v >> 48)
out[2] = byte(v >> 40) out[2] = byte(v >> 40)
......
...@@ -69,6 +69,7 @@ func load_be64(b []byte) { ...@@ -69,6 +69,7 @@ func load_be64(b []byte) {
// amd64:`BSWAPQ` // amd64:`BSWAPQ`
// s390x:`MOVD\s\(.*\),` // s390x:`MOVD\s\(.*\),`
// arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W` // arm64:`REV`,`MOVD\s\(R[0-9]+\),`,-`MOV[BHW]`,-`REVW`,-`REV16W`
// ppc64le:`MOVDBR`
sink64 = binary.BigEndian.Uint64(b) sink64 = binary.BigEndian.Uint64(b)
} }
...@@ -76,6 +77,7 @@ func load_be64_idx(b []byte, idx int) { ...@@ -76,6 +77,7 @@ func load_be64_idx(b []byte, idx int) {
// amd64:`BSWAPQ` // amd64:`BSWAPQ`
// s390x:`MOVD\s\(.*\)\(.*\*1\),` // s390x:`MOVD\s\(.*\)\(.*\*1\),`
// arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W` // arm64:`REV`,`MOVD\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[WHB]`,-`REVW`,-`REV16W`
// ppc64le:`MOVDBR`
sink64 = binary.BigEndian.Uint64(b[idx:]) sink64 = binary.BigEndian.Uint64(b[idx:])
} }
...@@ -83,6 +85,7 @@ func load_be32(b []byte) { ...@@ -83,6 +85,7 @@ func load_be32(b []byte) {
// amd64:`BSWAPL` // amd64:`BSWAPL`
// s390x:`MOVWZ\s\(.*\),` // s390x:`MOVWZ\s\(.*\),`
// arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W` // arm64:`REVW`,`MOVWU\s\(R[0-9]+\),`,-`MOV[BH]`,-`REV16W`
// ppc64le:`MOVWBR`
sink32 = binary.BigEndian.Uint32(b) sink32 = binary.BigEndian.Uint32(b)
} }
...@@ -90,18 +93,21 @@ func load_be32_idx(b []byte, idx int) { ...@@ -90,18 +93,21 @@ func load_be32_idx(b []byte, idx int) {
// amd64:`BSWAPL` // amd64:`BSWAPL`
// s390x:`MOVWZ\s\(.*\)\(.*\*1\),` // s390x:`MOVWZ\s\(.*\)\(.*\*1\),`
// arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W` // arm64:`REVW`,`MOVWU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOV[HB]`,-`REV16W`
// ppc64le:`MOVWBR`
sink32 = binary.BigEndian.Uint32(b[idx:]) sink32 = binary.BigEndian.Uint32(b[idx:])
} }
func load_be16(b []byte) { func load_be16(b []byte) {
// amd64:`ROLW\s\$8` // amd64:`ROLW\s\$8`
// arm64: `REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB` // arm64: `REV16W`,`MOVHU\s\(R[0-9]+\),`,-`MOVB`
// ppc64le:`MOVHBR`
sink16 = binary.BigEndian.Uint16(b) sink16 = binary.BigEndian.Uint16(b)
} }
func load_be16_idx(b []byte, idx int) { func load_be16_idx(b []byte, idx int) {
// amd64:`ROLW\s\$8` // amd64:`ROLW\s\$8`
// arm64: `REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB` // arm64: `REV16W`,`MOVHU\s\(R[0-9]+\)\(R[0-9]+\),`,-`MOVB`
// ppc64le:`MOVHBR`
sink16 = binary.BigEndian.Uint16(b[idx:]) sink16 = binary.BigEndian.Uint16(b[idx:])
} }
...@@ -203,50 +209,56 @@ func store_le32_idx(b []byte, idx int) { ...@@ -203,50 +209,56 @@ func store_le32_idx(b []byte, idx int) {
func store_le16(b []byte) { func store_le16(b []byte) {
// amd64:`MOVW\s` // amd64:`MOVW\s`
// arm64:`MOVH`,-`MOVB` // arm64:`MOVH`,-`MOVB`
// ppc64le(DISABLED):`MOVH\s` // ppc64le:`MOVH\s`
binary.LittleEndian.PutUint16(b, sink16) binary.LittleEndian.PutUint16(b, sink16)
} }
func store_le16_idx(b []byte, idx int) { func store_le16_idx(b []byte, idx int) {
// amd64:`MOVW\s` // amd64:`MOVW\s`
// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB` // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOVB`
// ppc64le(DISABLED):`MOVH\s` // ppc64le:`MOVH\s`
binary.LittleEndian.PutUint16(b[idx:], sink16) binary.LittleEndian.PutUint16(b[idx:], sink16)
} }
func store_be64(b []byte) { func store_be64(b []byte) {
// amd64:`BSWAPQ`,-`SHR.` // amd64:`BSWAPQ`,-`SHR.`
// arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W` // arm64:`MOVD`,`REV`,-`MOV[WBH]`,-`REVW`,-`REV16W`
// ppc64le:`MOVDBR`
binary.BigEndian.PutUint64(b, sink64) binary.BigEndian.PutUint64(b, sink64)
} }
func store_be64_idx(b []byte, idx int) { func store_be64_idx(b []byte, idx int) {
// amd64:`BSWAPQ`,-`SHR.` // amd64:`BSWAPQ`,-`SHR.`
// arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW` // arm64:`REV`,`MOVD\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BHW]`,-`REV16W`,-`REVW`
// ppc64le:`MOVDBR`
binary.BigEndian.PutUint64(b[idx:], sink64) binary.BigEndian.PutUint64(b[idx:], sink64)
} }
func store_be32(b []byte) { func store_be32(b []byte) {
// amd64:`BSWAPL`,-`SHR.` // amd64:`BSWAPL`,-`SHR.`
// arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W` // arm64:`MOVW`,`REVW`,-`MOV[BH]`,-`REV16W`
// ppc64le:`MOVWBR`
binary.BigEndian.PutUint32(b, sink32) binary.BigEndian.PutUint32(b, sink32)
} }
func store_be32_idx(b []byte, idx int) { func store_be32_idx(b []byte, idx int) {
// amd64:`BSWAPL`,-`SHR.` // amd64:`BSWAPL`,-`SHR.`
// arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W` // arm64:`REVW`,`MOVW\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,-`MOV[BH]`,-`REV16W`
// ppc64le:`MOVWBR`
binary.BigEndian.PutUint32(b[idx:], sink32) binary.BigEndian.PutUint32(b[idx:], sink32)
} }
func store_be16(b []byte) { func store_be16(b []byte) {
// amd64:`ROLW\s\$8`,-`SHR.` // amd64:`ROLW\s\$8`,-`SHR.`
// arm64:`MOVH`,`REV16W`,-`MOVB` // arm64:`MOVH`,`REV16W`,-`MOVB`
// ppc64le:`MOVHBR`
binary.BigEndian.PutUint16(b, sink16) binary.BigEndian.PutUint16(b, sink16)
} }
func store_be16_idx(b []byte, idx int) { func store_be16_idx(b []byte, idx int) {
// amd64:`ROLW\s\$8`,-`SHR.` // amd64:`ROLW\s\$8`,-`SHR.`
// arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB` // arm64:`MOVH\sR[0-9]+,\s\(R[0-9]+\)\(R[0-9]+\)`,`REV16W`,-`MOVB`
// ppc64le:`MOVHBR`
binary.BigEndian.PutUint16(b[idx:], sink16) binary.BigEndian.PutUint16(b[idx:], sink16)
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment