Commit 2a415820 authored by Michael Munday's avatar Michael Munday Committed by Matthew Dempsky

cmd/compile/internal/gc: refactor cgen_div

This commit adds two new functions to cgen.go: hasHMUL64 and
hasRROTC64. These are used to determine whether or not an
architecture supports the instructions needed to perform an
optimization in cgen_div.

This commit should not affect existing architectures (although it
does add s390x to the new functions). However, since most
architectures support HMUL the hasHMUL64 function could be
modified to enable most of the optimizations in cgen_div on those
platforms.

Change-Id: I33bf329ddeb6cf2954bd17b7c161012de352fb62
Reviewed-on: https://go-review.googlesource.com/21775Reviewed-by: default avatarMatthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: default avatarJosh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent a00ad5f4
......@@ -2622,24 +2622,48 @@ func cgen_ret(n *Node) {
}
}
// hasHMUL64 reports whether the architecture supports 64-bit
// signed and unsigned high multiplication (OHMUL).
func hasHMUL64() bool {
switch Ctxt.Arch.Family {
case sys.AMD64, sys.S390X:
return true
case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64:
return false
}
Fatalf("unknown architecture")
return false
}
// hasRROTC64 reports whether the architecture supports 64-bit
// rotate through carry instructions (ORROTC).
func hasRROTC64() bool {
switch Ctxt.Arch.Family {
case sys.AMD64:
return true
case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X:
return false
}
Fatalf("unknown architecture")
return false
}
// generate division according to op, one of:
// res = nl / nr
// res = nl % nr
func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
var w int
// TODO(rsc): arm64 needs to support the relevant instructions
// in peep and optoas in order to enable this.
// TODO(rsc): ppc64 needs to support the relevant instructions
// in peep and optoas in order to enable this.
if nr.Op != OLITERAL || Ctxt.Arch.Family == sys.MIPS64 || Ctxt.Arch.Family == sys.ARM64 || Ctxt.Arch.Family == sys.PPC64 {
// Architectures need to support 64-bit high multiplications
// (OHMUL) in order to perform divide by constant optimizations.
if nr.Op != OLITERAL || !hasHMUL64() {
goto longdiv
}
w = int(nl.Type.Width * 8)
// Front end handled 32-bit division. We only need to handle 64-bit.
// try to do division by multiply by (2^w)/d
// see hacker's delight chapter 10
// Try to do division using multiplication: (2^w)/d.
// See Hacker's Delight, chapter 10.
switch Simtype[nl.Type.Etype] {
default:
goto longdiv
......@@ -2652,6 +2676,17 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
if m.Bad != 0 {
break
}
// In order to add the numerator we need to be able to
// avoid overflow. This is done by shifting the result of the
// addition right by 1 and inserting the carry bit into
// the MSB. For now this needs the RROTC instruction.
// TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes
// an alternative sequence of instructions for architectures
// that do not have a shift right with carry instruction.
if m.Ua != 0 && !hasRROTC64() {
goto longdiv
}
if op == OMOD {
goto longmod
}
......@@ -2665,7 +2700,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
Thearch.Cgen_hmul(&n1, &n2, &n3)
if m.Ua != 0 {
// need to add numerator accounting for overflow
// Need to add numerator accounting for overflow.
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
Nodconst(&n2, nl.Type, 1)
......@@ -2703,7 +2738,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
Thearch.Cgen_hmul(&n1, &n2, &n3)
if m.Sm < 0 {
// need to add numerator
// Need to add numerator (cannot overflow).
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
}
......@@ -2716,8 +2751,8 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added
if m.Sd < 0 {
// this could probably be removed
// by factoring it into the multiplier
// This could probably be removed by factoring it into
// the multiplier.
Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3)
}
......@@ -2729,14 +2764,14 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
goto longdiv
// division and mod using (slow) hardware instruction
// Division and mod using (slow) hardware instruction.
longdiv:
Thearch.Dodiv(op, nl, nr, res)
return
// mod using formula A%B = A-(A/B*B) but
// we know that there is a fast algorithm for A/B
// Mod using formula A%B = A-(A/B*B) but
// we know that there is a fast algorithm for A/B.
longmod:
var n1 Node
Regalloc(&n1, nl.Type, res)
......@@ -2746,11 +2781,6 @@ longmod:
Regalloc(&n2, nl.Type, nil)
cgen_div(ODIV, &n1, nr, &n2)
a := Thearch.Optoas(OMUL, nl.Type)
if w == 8 {
// use 2-operand 16-bit multiply
// because there is no 2-operand 8-bit multiply
a = Thearch.Optoas(OMUL, Types[TINT16]) // XXX was IMULW
}
if !Smallintconst(nr) {
var n3 Node
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment