Commit 3311275c authored by Lynn Boger's avatar Lynn Boger Committed by Michael Munday

math, cmd/internal/obj/ppc64: improve floor, ceil, trunc with asm

This adds the instructions frim, frip, and friz to the ppc64x
assembler for use in implementing the math.Floor, math.Ceil, and
math.Trunc functions to improve performance.

Fixes #17185

BenchmarkCeil-128                    21.4          6.99          -67.34%
BenchmarkFloor-128                   13.9          6.37          -54.17%
BenchmarkTrunc-128                   12.7          6.33          -50.16%

Change-Id: I96131bd4e8c9c8dbafb25bfeb544cf9d2dbb4282
Reviewed-on: https://go-review.googlesource.com/29654
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: default avatarMichael Munday <munday@ca.ibm.com>
parent ed915ad4
......@@ -496,6 +496,12 @@ const (
/* optional on 32-bit */
AFRES
AFRESCC
AFRIM
AFRIMCC
AFRIP
AFRIPCC
AFRIZ
AFRIZCC
AFRSQRTE
AFRSQRTECC
AFSEL
......
......@@ -229,6 +229,12 @@ var Anames = []string{
"RFCI",
"FRES",
"FRESCC",
"FRIM",
"FRIMCC",
"FRIP",
"FRIPCC",
"FRIZ",
"FRIZCC",
"FRSQRTE",
"FRSQRTECC",
"FSEL",
......
......@@ -1371,6 +1371,12 @@ func buildop(ctxt *obj.Link) {
opset(AFCFIDUCC, r0)
opset(AFRES, r0)
opset(AFRESCC, r0)
opset(AFRIM, r0)
opset(AFRIMCC, r0)
opset(AFRIP, r0)
opset(AFRIPCC, r0)
opset(AFRIZ, r0)
opset(AFRIZCC, r0)
opset(AFRSQRTE, r0)
opset(AFRSQRTECC, r0)
opset(AFSQRT, r0)
......@@ -3173,6 +3179,18 @@ func oprrr(ctxt *obj.Link, a obj.As) uint32 {
return OPVCC(59, 24, 0, 0)
case AFRESCC:
return OPVCC(59, 24, 0, 1)
case AFRIM:
return OPVCC(63, 488, 0, 0)
case AFRIMCC:
return OPVCC(63, 488, 0, 1)
case AFRIP:
return OPVCC(63, 456, 0, 0)
case AFRIPCC:
return OPVCC(63, 456, 0, 1)
case AFRIZ:
return OPVCC(63, 424, 0, 0)
case AFRIZCC:
return OPVCC(63, 424, 0, 1)
case AFRSP:
return OPVCC(63, 12, 0, 0)
case AFRSPCC:
......
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build ppc64 ppc64le
#include "textflag.h"
TEXT ·Floor(SB),NOSPLIT,$0
FMOVD x+0(FP), F0
FRIM F0, F0
FMOVD F0, ret+8(FP)
RET
TEXT ·Ceil(SB),NOSPLIT,$0
FMOVD x+0(FP), F0
FRIP F0, F0
FMOVD F0, ret+8(FP)
RET
TEXT ·Trunc(SB),NOSPLIT,$0
FMOVD x+0(FP), F0
FRIZ F0, F0
FMOVD F0, ret+8(FP)
RET
......@@ -36,15 +36,6 @@ TEXT ·Expm1(SB),NOSPLIT,$0
TEXT ·Exp(SB),NOSPLIT,$0
BR ·exp(SB)
TEXT ·Floor(SB),NOSPLIT,$0
BR ·floor(SB)
TEXT ·Ceil(SB),NOSPLIT,$0
BR ·ceil(SB)
TEXT ·Trunc(SB),NOSPLIT,$0
BR ·trunc(SB)
TEXT ·Frexp(SB),NOSPLIT,$0
BR ·frexp(SB)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment