Commit cebf9d47 authored by Clément Chigot's avatar Clément Chigot Committed by Ian Lance Taylor

cmd/link: optimize access to data symbols for aix/ppc64

This commit changes the second instruction used to retrieve a symbol on
aix/ppc64 if it is in .data or .bss section.

The previous version always retrieves the symbol address via a load on
its TOC symbol. However, as the TOC is also in .data, the symbol's address
is close enough to be fetched directly and the load instruction can be
replaced by an addi.

Bench go1
benchmark                             old ns/op      new ns/op      delta
BenchmarkBinaryTree17-16              5919354000     5824897000     -1.60%
BenchmarkFannkuch11-16                5206937000     5162043000     -0.86%
BenchmarkFmtFprintfEmpty-16           106            105            -0.94%
BenchmarkFmtFprintfString-16          165            165            +0.00%
BenchmarkFmtFprintfInt-16             165            167            +1.21%
BenchmarkFmtFprintfIntInt-16          303            239            -21.12%
BenchmarkFmtFprintfPrefixedInt-16     282            283            +0.35%
BenchmarkFmtFprintfFloat-16           434            381            -12.21%
BenchmarkFmtManyArgs-16               1797           903            -49.75%
BenchmarkGobDecode-16                 16000450       12173630       -23.92%
BenchmarkGobEncode-16                 12007010       10258070       -14.57%
BenchmarkGzip-16                      638581500      456050333      -28.58%
BenchmarkGunzip-16                    111976900      74943900       -33.07%
BenchmarkHTTPClientServer-16          206850         153716         -25.69%
BenchmarkJSONEncode-16                32057380       17517130       -45.36%
BenchmarkJSONDecode-16                182606400      106807700      -41.51%
BenchmarkMandelbrot200-16             6896975        5616903        -18.56%
BenchmarkGoParse-16                   11248260       6094115        -45.82%
BenchmarkRegexpMatchEasy0_32-16       292            148            -49.32%
BenchmarkRegexpMatchEasy0_1K-16       540            327            -39.44%
BenchmarkRegexpMatchEasy1_32-16       243            150            -38.27%
BenchmarkRegexpMatchEasy1_1K-16       1029           657            -36.15%
BenchmarkRegexpMatchMedium_32-16      423            230            -45.63%
BenchmarkRegexpMatchMedium_1K-16      107250         59683          -44.35%
BenchmarkRegexpMatchHard_32-16        3353           3139           -6.38%
BenchmarkRegexpMatchHard_1K-16        107277         93610          -12.74%
BenchmarkRevcomp-16                   1124311500     677442500      -39.75%
BenchmarkTemplate-16                  241286600      109177400      -54.75%
BenchmarkTimeParse-16                 1058           562            -46.88%
BenchmarkTimeFormat-16                1321           581            -56.02%

benchmark                            old MB/s     new MB/s     speedup
BenchmarkGobDecode-16                47.97        63.05        1.31x
BenchmarkGobEncode-16                63.92        74.82        1.17x
BenchmarkGzip-16                     30.39        42.55        1.40x
BenchmarkGunzip-16                   173.29       258.92       1.49x
BenchmarkJSONEncode-16               60.53        110.78       1.83x
BenchmarkJSONDecode-16               10.63        18.17        1.71x
BenchmarkGoParse-16                  5.15         9.50         1.84x
BenchmarkRegexpMatchEasy0_32-16      109.42       215.86       1.97x
BenchmarkRegexpMatchEasy0_1K-16      1896.22      3126.28      1.65x
BenchmarkRegexpMatchEasy1_32-16      131.46       212.99       1.62x
BenchmarkRegexpMatchEasy1_1K-16      994.55       1557.51      1.57x
BenchmarkRegexpMatchMedium_32-16     2.36         4.34         1.84x
BenchmarkRegexpMatchMedium_1K-16     9.55         17.16        1.80x
BenchmarkRegexpMatchHard_32-16       9.54         10.19        1.07x
BenchmarkRegexpMatchHard_1K-16       9.55         10.94        1.15x
BenchmarkRevcomp-16                  226.06       375.19       1.66x
BenchmarkTemplate-16                 8.04         17.77        2.21x

Change-Id: Iaf2aa5953b99271361510c69a5ced3371f6c6c20
Reviewed-on: https://go-review.googlesource.com/c/151201
Run-TryBot: Ian Lance Taylor <iant@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarIan Lance Taylor <iant@golang.org>
parent 34437f04
......@@ -39,6 +39,7 @@ import (
"encoding/binary"
"fmt"
"log"
"strings"
)
func genplt(ctxt *ld.Link) {
......@@ -490,6 +491,9 @@ func symtoc(ctxt *ld.Link, s *sym.Symbol) int64 {
}
// archreloctoc relocates a TOC relative symbol.
// If the symbol pointed by this TOC relative symbol is in .data or .bss, the
// default load instruction can be changed to an addi instruction and the
// symbol address can be used directly.
// This code is for AIX only.
func archreloctoc(ctxt *ld.Link, r *sym.Reloc, s *sym.Symbol, val int64) int64 {
if ctxt.HeadType == objabi.Hlinux {
......@@ -500,7 +504,25 @@ func archreloctoc(ctxt *ld.Link, r *sym.Reloc, s *sym.Symbol, val int64) int64 {
o1 = uint32(val >> 32)
o2 = uint32(val)
t := ld.Symaddr(r.Sym) + r.Add - ctxt.Syms.ROLookup("TOC", 0).Value // sym addr
var t int64
useAddi := false
const prefix = "TOC."
var tarSym *sym.Symbol
if strings.HasPrefix(r.Sym.Name, prefix) {
tarSym = ctxt.Syms.ROLookup(strings.TrimPrefix(r.Sym.Name, prefix), 0)
} else {
ld.Errorf(s, "archreloctoc called for a symbol without TOC anchor")
}
if tarSym != nil && tarSym.Attr.Reachable() && (tarSym.Sect.Seg == &ld.Segdata) {
t = ld.Symaddr(tarSym) + r.Add - ctxt.Syms.ROLookup("TOC", 0).Value
// change ld to addi in the second instruction
o2 = (o2 & 0x03FF0000) | 0xE<<26
useAddi = true
} else {
t = ld.Symaddr(r.Sym) + r.Add - ctxt.Syms.ROLookup("TOC", 0).Value
}
if t != int64(int32(t)) {
ld.Errorf(s, "TOC relocation for %s is too big to relocate %s: 0x%x", s.Name, r.Sym, t)
}
......@@ -513,10 +535,14 @@ func archreloctoc(ctxt *ld.Link, r *sym.Reloc, s *sym.Symbol, val int64) int64 {
switch r.Type {
case objabi.R_ADDRPOWER_TOCREL_DS:
if t&3 != 0 {
ld.Errorf(s, "bad DS reloc for %s: %d", s.Name, ld.Symaddr(r.Sym))
if useAddi {
o2 |= uint32(t) & 0xFFFF
} else {
if t&3 != 0 {
ld.Errorf(s, "bad DS reloc for %s: %d", s.Name, ld.Symaddr(r.Sym))
}
o2 |= uint32(t) & 0xFFFC
}
o2 |= uint32(t) & 0xFFFC
default:
return -1
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment