Commit 2a415820 authored by Michael Munday's avatar Michael Munday Committed by Matthew Dempsky

cmd/compile/internal/gc: refactor cgen_div

This commit adds two new functions to cgen.go: hasHMUL64 and
hasRROTC64. These are used to determine whether or not an
architecture supports the instructions needed to perform an
optimization in cgen_div.

This commit should not affect existing architectures (although it
does add s390x to the new functions). However, since most
architectures support HMUL the hasHMUL64 function could be
modified to enable most of the optimizations in cgen_div on those
platforms.

Change-Id: I33bf329ddeb6cf2954bd17b7c161012de352fb62
Reviewed-on: https://go-review.googlesource.com/21775Reviewed-by: default avatarMatthew Dempsky <mdempsky@google.com>
Run-TryBot: Matthew Dempsky <mdempsky@google.com>
Reviewed-by: default avatarJosh Bleecher Snyder <josharian@gmail.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
parent a00ad5f4
...@@ -2622,24 +2622,48 @@ func cgen_ret(n *Node) { ...@@ -2622,24 +2622,48 @@ func cgen_ret(n *Node) {
} }
} }
// hasHMUL64 reports whether the architecture supports 64-bit
// signed and unsigned high multiplication (OHMUL).
func hasHMUL64() bool {
switch Ctxt.Arch.Family {
case sys.AMD64, sys.S390X:
return true
case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64:
return false
}
Fatalf("unknown architecture")
return false
}
// hasRROTC64 reports whether the architecture supports 64-bit
// rotate through carry instructions (ORROTC).
func hasRROTC64() bool {
switch Ctxt.Arch.Family {
case sys.AMD64:
return true
case sys.ARM, sys.ARM64, sys.I386, sys.MIPS64, sys.PPC64, sys.S390X:
return false
}
Fatalf("unknown architecture")
return false
}
// generate division according to op, one of: // generate division according to op, one of:
// res = nl / nr // res = nl / nr
// res = nl % nr // res = nl % nr
func cgen_div(op Op, nl *Node, nr *Node, res *Node) { func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
var w int var w int
// TODO(rsc): arm64 needs to support the relevant instructions // Architectures need to support 64-bit high multiplications
// in peep and optoas in order to enable this. // (OHMUL) in order to perform divide by constant optimizations.
// TODO(rsc): ppc64 needs to support the relevant instructions if nr.Op != OLITERAL || !hasHMUL64() {
// in peep and optoas in order to enable this.
if nr.Op != OLITERAL || Ctxt.Arch.Family == sys.MIPS64 || Ctxt.Arch.Family == sys.ARM64 || Ctxt.Arch.Family == sys.PPC64 {
goto longdiv goto longdiv
} }
w = int(nl.Type.Width * 8) w = int(nl.Type.Width * 8)
// Front end handled 32-bit division. We only need to handle 64-bit. // Front end handled 32-bit division. We only need to handle 64-bit.
// try to do division by multiply by (2^w)/d // Try to do division using multiplication: (2^w)/d.
// see hacker's delight chapter 10 // See Hacker's Delight, chapter 10.
switch Simtype[nl.Type.Etype] { switch Simtype[nl.Type.Etype] {
default: default:
goto longdiv goto longdiv
...@@ -2652,6 +2676,17 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { ...@@ -2652,6 +2676,17 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
if m.Bad != 0 { if m.Bad != 0 {
break break
} }
// In order to add the numerator we need to be able to
// avoid overflow. This is done by shifting the result of the
// addition right by 1 and inserting the carry bit into
// the MSB. For now this needs the RROTC instruction.
// TODO(mundaym): Hacker's Delight 2nd ed. chapter 10 proposes
// an alternative sequence of instructions for architectures
// that do not have a shift right with carry instruction.
if m.Ua != 0 && !hasRROTC64() {
goto longdiv
}
if op == OMOD { if op == OMOD {
goto longmod goto longmod
} }
...@@ -2665,7 +2700,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { ...@@ -2665,7 +2700,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
Thearch.Cgen_hmul(&n1, &n2, &n3) Thearch.Cgen_hmul(&n1, &n2, &n3)
if m.Ua != 0 { if m.Ua != 0 {
// need to add numerator accounting for overflow // Need to add numerator accounting for overflow.
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3) Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
Nodconst(&n2, nl.Type, 1) Nodconst(&n2, nl.Type, 1)
...@@ -2703,7 +2738,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { ...@@ -2703,7 +2738,7 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
Thearch.Cgen_hmul(&n1, &n2, &n3) Thearch.Cgen_hmul(&n1, &n2, &n3)
if m.Sm < 0 { if m.Sm < 0 {
// need to add numerator // Need to add numerator (cannot overflow).
Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3) Thearch.Gins(Thearch.Optoas(OADD, nl.Type), &n1, &n3)
} }
...@@ -2716,8 +2751,8 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { ...@@ -2716,8 +2751,8 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added Thearch.Gins(Thearch.Optoas(OSUB, nl.Type), &n1, &n3) // added
if m.Sd < 0 { if m.Sd < 0 {
// this could probably be removed // This could probably be removed by factoring it into
// by factoring it into the multiplier // the multiplier.
Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3) Thearch.Gins(Thearch.Optoas(OMINUS, nl.Type), nil, &n3)
} }
...@@ -2729,14 +2764,14 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) { ...@@ -2729,14 +2764,14 @@ func cgen_div(op Op, nl *Node, nr *Node, res *Node) {
goto longdiv goto longdiv
// division and mod using (slow) hardware instruction // Division and mod using (slow) hardware instruction.
longdiv: longdiv:
Thearch.Dodiv(op, nl, nr, res) Thearch.Dodiv(op, nl, nr, res)
return return
// mod using formula A%B = A-(A/B*B) but // Mod using formula A%B = A-(A/B*B) but
// we know that there is a fast algorithm for A/B // we know that there is a fast algorithm for A/B.
longmod: longmod:
var n1 Node var n1 Node
Regalloc(&n1, nl.Type, res) Regalloc(&n1, nl.Type, res)
...@@ -2746,11 +2781,6 @@ longmod: ...@@ -2746,11 +2781,6 @@ longmod:
Regalloc(&n2, nl.Type, nil) Regalloc(&n2, nl.Type, nil)
cgen_div(ODIV, &n1, nr, &n2) cgen_div(ODIV, &n1, nr, &n2)
a := Thearch.Optoas(OMUL, nl.Type) a := Thearch.Optoas(OMUL, nl.Type)
if w == 8 {
// use 2-operand 16-bit multiply
// because there is no 2-operand 8-bit multiply
a = Thearch.Optoas(OMUL, Types[TINT16]) // XXX was IMULW
}
if !Smallintconst(nr) { if !Smallintconst(nr) {
var n3 Node var n3 Node
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment