Commit 69261eca authored by Ben Shi's avatar Ben Shi Committed by Cherry Zhang

runtime: use hardware divider to improve performance

The hardware divider is an optional component of ARMv7. This patch
detects whether it is available in runtime and use it or not.

1. The hardware divider is detected at startup and a flag is set/clear
   according to a perticular bit of runtime.hwcap.
2. Each call of runtime.udiv will check this flag and decide if
   use the hardware division instruction.

A rough test shows the performance improves 40-50% for ARMv7. And
the compatibility of ARMv5/v6 is not broken.

fixes #19118

Change-Id: Ic586bc9659ebc169553ca2004d2bdb721df823ac
Reviewed-on: https://go-review.googlesource.com/37496
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarCherry Zhang <cherryyz@google.com>
parent 2a8d99e4
......@@ -12,6 +12,7 @@
// int8_t DidInitRun();
// int8_t DidMainRun();
// int32_t FromPkg();
// uint32_t Divu(uint32_t, uint32_t);
int main(void) {
int8_t ran_init = DidInitRun();
if (!ran_init) {
......@@ -30,6 +31,11 @@ int main(void) {
fprintf(stderr, "ERROR: FromPkg=%d, want %d\n", from_pkg, 1024);
return 1;
}
uint32_t divu = Divu(2264, 31);
if (divu != 73) {
fprintf(stderr, "ERROR: Divu(2264, 31)=%d, want %d\n", divu, 73);
return 1;
}
// test.bash looks for "PASS" to ensure this program has reached the end.
printf("PASS\n");
return 0;
......
......@@ -8,3 +8,5 @@ import "C"
//export FromPkg
func FromPkg() int32 { return 1024 }
//export Divu
func Divu(a, b uint32) uint32 { return a / b }
......@@ -400,6 +400,12 @@ func TestTrivialExecutablePIE(t *testing.T) {
AssertHasRPath(t, "./trivial.pie", gorootInstallDir)
}
// Build a division test program and check it runs.
func TestDivisionExecutable(t *testing.T) {
goCmd(t, "install", "-linkshared", "division")
run(t, "division executable", "./bin/division")
}
// Build an executable that uses cgo linked against the shared runtime and check it
// runs.
func TestCgoExecutable(t *testing.T) {
......
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
//go:noinline
func div(x, y uint32) uint32 {
return x / y
}
func main() {
a := div(97, 11)
if a != 8 {
panic("FAIL")
}
}
\ No newline at end of file
......@@ -965,6 +965,13 @@ jmp_label_3:
REVSH R1, R2 // b12fffe6
RBIT R1, R2 // 312fffe6
// DIVHW R0, R1, R2: R1 / R0 -> R2
DIVHW R0, R1, R2 // 11f012e7
DIVUHW R0, R1, R2 // 11f032e7
// DIVHW R0, R1: R1 / R0 -> R1
DIVHW R0, R1 // 11f011e7
DIVUHW R0, R1 // 11f031e7
//
// END
//
......
......@@ -247,6 +247,8 @@ const (
ADIV
AMOD
AMODU
ADIVHW
ADIVUHW
AMOVB
AMOVBS
......
......@@ -71,6 +71,8 @@ var Anames = []string{
"DIV",
"MOD",
"MODU",
"DIVHW",
"DIVUHW",
"MOVB",
"MOVBS",
"MOVBU",
......
......@@ -142,6 +142,8 @@ var optab = []Optab{
{AMUL, C_REG, C_NONE, C_REG, 15, 4, 0, 0, 0},
{ADIV, C_REG, C_REG, C_REG, 16, 4, 0, 0, 0},
{ADIV, C_REG, C_NONE, C_REG, 16, 4, 0, 0, 0},
{ADIVHW, C_REG, C_REG, C_REG, 105, 4, 0, 0, 0},
{ADIVHW, C_REG, C_NONE, C_REG, 105, 4, 0, 0, 0},
{AMULL, C_REG, C_REG, C_REGREG, 17, 4, 0, 0, 0},
{AMULA, C_REG, C_REG, C_REGREG2, 17, 4, 0, 0, 0},
{AMOVW, C_REG, C_NONE, C_SAUTO, 20, 4, REGSP, 0, 0},
......@@ -1401,6 +1403,9 @@ func buildop(ctxt *obj.Link) {
opset(AMODU, r0)
opset(ADIVU, r0)
case ADIVHW:
opset(ADIVUHW, r0)
case AMOVW,
AMOVB,
AMOVBS,
......@@ -2407,6 +2412,16 @@ func (c *ctxt5) asmout(p *obj.Prog, o *Optab, out []uint32) {
if p.As == ADATABUNDLE {
o1 = 0xe125be70
}
case 105: /* divhw r,[r,]r */
o1 = c.oprrr(p, p.As, int(p.Scond))
rf := int(p.From.Reg)
rt := int(p.To.Reg)
r := int(p.Reg)
if r == 0 {
r = rt
}
o1 |= (uint32(rf)&15)<<8 | (uint32(r)&15)<<0 | (uint32(rt)&15)<<16
}
out[0] = o1
......@@ -2445,6 +2460,10 @@ func (c *ctxt5) oprrr(p *obj.Prog, a obj.As, sc int) uint32 {
c.ctxt.Diag(".nil/.W on dp instruction")
}
switch a {
case ADIVHW:
return o | 0x71<<20 | 0xf<<12 | 0x1<<4
case ADIVUHW:
return o | 0x73<<20 | 0xf<<12 | 0x1<<4
case AMMUL:
return o | 0x75<<20 | 0xf<<12 | 0x1<<4
case AMULS:
......
......@@ -4,6 +4,8 @@
package runtime
var hardDiv bool // TODO: set if a hardware divider is available
func checkgoarm() {
// TODO(minux): FP checks like in os_linux_arm.go.
......
......@@ -4,6 +4,8 @@
package runtime
var hardDiv bool // TODO: set if a hardware divider is available
func checkgoarm() {
// TODO(minux): FP checks like in os_linux_arm.go.
......
......@@ -11,11 +11,13 @@ const (
_HWCAP_VFP = 1 << 6 // introduced in at least 2.6.11
_HWCAP_VFPv3 = 1 << 13 // introduced in 2.6.30
_HWCAP_IDIVA = 1 << 17
)
var randomNumber uint32
var armArch uint8 = 6 // we default to ARMv6
var hwcap uint32 // set by setup_auxv
var hardDiv bool // set if a hardware divider is available
func checkgoarm() {
// On Android, /proc/self/auxv might be unreadable and hwcap won't
......@@ -53,6 +55,7 @@ func archauxv(tag, val uintptr) {
case _AT_HWCAP: // CPU capability bit flags
hwcap = uint32(val)
hardDiv = (hwcap & _HWCAP_IDIVA) != 0
}
}
......
......@@ -4,6 +4,8 @@
package runtime
var hardDiv bool // TODO: set if a hardware divider is available
func checkgoarm() {
// TODO(minux): FP checks like in os_linux_arm.go.
......
......@@ -6,6 +6,8 @@ package runtime
import "unsafe"
var hardDiv bool // TODO: set if a hardware divider is available
func lwp_mcontext_init(mc *mcontextt, stk unsafe.Pointer, mp *m, gp *g, fn uintptr) {
// Machine dependent mcontext initialisation for LWP.
mc.__gregs[_REG_R15] = uint32(funcPC(lwp_tramp))
......
......@@ -4,6 +4,8 @@
package runtime
var hardDiv bool // TODO: set if a hardware divider is available
func checkgoarm() {
// TODO(minux): FP checks like in os_linux_arm.go.
......
......@@ -4,6 +4,8 @@
package runtime
var hardDiv bool // TODO: set if a hardware divider is available
func checkgoarm() {
return // TODO(minux)
}
......
......@@ -119,6 +119,10 @@ TEXT runtime·_sfloatpanic(SB),NOSPLIT,$-4
// Be careful: Ra == R11 will be used by the linker for synthesized instructions.
TEXT udiv(SB),NOSPLIT,$-4
MOVBU runtime·hardDiv(SB), Ra
CMP $0, Ra
BNE udiv_hardware
CLZ Rq, Rs // find normalizing shift
MOVW.S Rq<<Rs, Ra
MOVW $fast_udiv_tab<>-64(SB), RM
......@@ -154,6 +158,14 @@ TEXT udiv(SB),NOSPLIT,$-4
ADD.PL $2, Rq
RET
// use hardware divider
udiv_hardware:
DIVUHW Rq, Rr, Rs
MUL Rs, Rq, RM
RSB Rr, RM, Rr
MOVW Rs, Rq
RET
udiv_by_large_d:
// at this point we know d>=2^(31-6)=2^25
SUB $4, Ra, Ra
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment