Commit 90e0b40a authored by Ruixin Bao's avatar Ruixin Bao Committed by Michael Munday

cmd/internal/obj/s390x: use 12 bit load and store instruction when possible on s390x

Originally, we default to use load and store instruction with 20 bit displacement.
However, that is not necessary. Some instructions have a displacement smaller
than 12 bit. This CL allows the usage of 12 bit load and store instruction when
that happens.

This change also reduces the size of .text section in go binary by 19 KB.

Some tests are also added to verify the functionality of the change.

Change-Id: I13edea06ca653d4b9ffeaefe8d010bc2f065c2ba
Reviewed-on: https://go-review.googlesource.com/c/go/+/194857Reviewed-by: default avatarMichael Munday <mike.munday@ibm.com>
Run-TryBot: Michael Munday <mike.munday@ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent c20eb96e
...@@ -33,12 +33,12 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- ...@@ -33,12 +33,12 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
MOVWBR (R15), R9 // e390f000001e MOVWBR (R15), R9 // e390f000001e
MOVD R1, n-8(SP) // e310f0100024 MOVD R1, n-8(SP) // e310f0100024
MOVW R2, n-8(SP) // e320f0100050 MOVW R2, n-8(SP) // 5020f010
MOVH R3, n-8(SP) // e330f0100070 MOVH R3, n-8(SP) // 4030f010
MOVB R4, n-8(SP) // e340f0100072 MOVB R4, n-8(SP) // 4240f010
MOVWZ R5, n-8(SP) // e350f0100050 MOVWZ R5, n-8(SP) // 5050f010
MOVHZ R6, n-8(SP) // e360f0100070 MOVHZ R6, n-8(SP) // 4060f010
MOVBZ R7, n-8(SP) // e370f0100072 MOVBZ R7, n-8(SP) // 4270f010
MOVDBR R8, n-8(SP) // e380f010002f MOVDBR R8, n-8(SP) // e380f010002f
MOVWBR R9, n-8(SP) // e390f010003e MOVWBR R9, n-8(SP) // e390f010003e
...@@ -58,6 +58,20 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- ...@@ -58,6 +58,20 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
MOVB $-128, -524288(R5) // eb8050008052 MOVB $-128, -524288(R5) // eb8050008052
MOVB $1, -524289(R6) // c0a1fff7ffff41aa60009201a000 MOVB $1, -524289(R6) // c0a1fff7ffff41aa60009201a000
// RX (12-bit displacement) and RXY (20-bit displacement) instruction encoding (e.g: ST vs STY)
MOVW R1, 4095(R2)(R3) // 50132fff
MOVW R1, 4096(R2)(R3) // e31320000150
MOVWZ R1, 4095(R2)(R3) // 50132fff
MOVWZ R1, 4096(R2)(R3) // e31320000150
MOVH R1, 4095(R2)(R3) // 40132fff
MOVHZ R1, 4095(R2)(R3) // 40132fff
MOVH R1, 4096(R2)(R3) // e31320000170
MOVHZ R1, 4096(R2)(R3) // e31320000170
MOVB R1, 4095(R2)(R3) // 42132fff
MOVBZ R1, 4095(R2)(R3) // 42132fff
MOVB R1, 4096(R2)(R3) // e31320000172
MOVBZ R1, 4096(R2)(R3) // e31320000172
ADD R1, R2 // b9e81022 ADD R1, R2 // b9e81022
ADD R1, R2, R3 // b9e81032 ADD R1, R2, R3 // b9e81032
ADD $8192, R1 // a71b2000 ADD $8192, R1 // a71b2000
...@@ -300,10 +314,22 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16- ...@@ -300,10 +314,22 @@ TEXT main·foo(SB),DUPOK|NOSPLIT,$16-0 // TEXT main.foo(SB), DUPOK|NOSPLIT, $16-
FMOVS $0, F11 // b37400b0 FMOVS $0, F11 // b37400b0
FMOVD $0, F12 // b37500c0 FMOVD $0, F12 // b37500c0
FMOVS (R1)(R2*1), F0 // ed0210000064 FMOVS (R1)(R2*1), F0 // 78021000
FMOVS n-8(SP), F15 // edf0f0100064 FMOVS n-8(SP), F15 // 78f0f010
FMOVD -9999999(R8)(R9*1), F8 // c0a1ff67698141aa9000ed8a80000065 FMOVD -9999999(R8)(R9*1), F8 // c0a1ff67698141aa9000688a8000
FMOVD F4, F5 // 2854 FMOVD F4, F5 // 2854
// RX (12-bit displacement) and RXY (20-bit displacement) instruction encoding (e.g. LD vs LDY)
FMOVD (R1), F0 // 68001000
FMOVD 4095(R2), F13 // 68d02fff
FMOVD 4096(R2), F15 // edf020000165
FMOVS 4095(R2)(R3), F13 // 78d32fff
FMOVS 4096(R2)(R4), F15 // edf420000164
FMOVD F0, 4095(R1) // 60001fff
FMOVD F0, 4096(R1) // ed0010000167
FMOVS F13, 4095(R2)(R3) // 70d32fff
FMOVS F13, 4096(R2)(R3) // edd320000166
FADDS F0, F15 // b30a00f0 FADDS F0, F15 // b30a00f0
FADD F1, F14 // b31a00e1 FADD F1, F14 // b31a00e1
FSUBS F2, F13 // b30b00d2 FSUBS F2, F13 // b30b00d2
......
...@@ -3320,7 +3320,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { ...@@ -3320,7 +3320,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
x2 = REGTMP x2 = REGTMP
d2 = 0 d2 = 0
} }
// Emits an RX instruction if an appropriate one exists and the displacement fits in 12 bits. Otherwise use an RXY instruction.
if op, ok := c.zopstore12(p.As); ok && isU12(d2) {
zRX(op, uint32(p.From.Reg), uint32(x2), uint32(b2), uint32(d2), asm)
} else {
zRXY(c.zopstore(p.As), uint32(p.From.Reg), uint32(x2), uint32(b2), uint32(d2), asm) zRXY(c.zopstore(p.As), uint32(p.From.Reg), uint32(x2), uint32(b2), uint32(d2), asm)
}
case 36: // mov mem reg (no relocation) case 36: // mov mem reg (no relocation)
d2 := c.regoff(&p.From) d2 := c.regoff(&p.From)
...@@ -3337,7 +3342,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) { ...@@ -3337,7 +3342,12 @@ func (c *ctxtz) asmout(p *obj.Prog, asm *[]byte) {
x2 = REGTMP x2 = REGTMP
d2 = 0 d2 = 0
} }
// Emits an RX instruction if an appropriate one exists and the displacement fits in 12 bits. Otherwise use an RXY instruction.
if op, ok := c.zopload12(p.As); ok && isU12(d2) {
zRX(op, uint32(p.To.Reg), uint32(x2), uint32(b2), uint32(d2), asm)
} else {
zRXY(c.zopload(p.As), uint32(p.To.Reg), uint32(x2), uint32(b2), uint32(d2), asm) zRXY(c.zopload(p.As), uint32(p.To.Reg), uint32(x2), uint32(b2), uint32(d2), asm)
}
case 40: // word/byte case 40: // word/byte
wd := uint32(c.regoff(&p.From)) wd := uint32(c.regoff(&p.From))
...@@ -4215,6 +4225,22 @@ func (c *ctxtz) regoff(a *obj.Addr) int32 { ...@@ -4215,6 +4225,22 @@ func (c *ctxtz) regoff(a *obj.Addr) int32 {
return int32(c.vregoff(a)) return int32(c.vregoff(a))
} }
// find if the displacement is within 12 bit
func isU12(displacement int32) bool {
return displacement >= 0 && displacement < DISP12
}
// zopload12 returns the RX op with 12 bit displacement for the given load
func (c *ctxtz) zopload12(a obj.As) (uint32, bool) {
switch a {
case AFMOVD:
return op_LD, true
case AFMOVS:
return op_LE, true
}
return 0, false
}
// zopload returns the RXY op for the given load // zopload returns the RXY op for the given load
func (c *ctxtz) zopload(a obj.As) uint32 { func (c *ctxtz) zopload(a obj.As) uint32 {
switch a { switch a {
...@@ -4253,6 +4279,23 @@ func (c *ctxtz) zopload(a obj.As) uint32 { ...@@ -4253,6 +4279,23 @@ func (c *ctxtz) zopload(a obj.As) uint32 {
return 0 return 0
} }
// zopstore12 returns the RX op with 12 bit displacement for the given store
func (c *ctxtz) zopstore12(a obj.As) (uint32, bool) {
switch a {
case AFMOVD:
return op_STD, true
case AFMOVS:
return op_STE, true
case AMOVW, AMOVWZ:
return op_ST, true
case AMOVH, AMOVHZ:
return op_STH, true
case AMOVB, AMOVBZ:
return op_STC, true
}
return 0, false
}
// zopstore returns the RXY op for the given store // zopstore returns the RXY op for the given store
func (c *ctxtz) zopstore(a obj.As) uint32 { func (c *ctxtz) zopstore(a obj.As) uint32 {
switch a { switch a {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment