Commit 7345fa54 authored by Martin Möhrmann's avatar Martin Möhrmann Committed by Rob Pike

fmt: optimize struct layout and intbuf size

Move boolean fields to the end of the pp struct for better packing.

Increase the fmt.intbuf size to leave no padding space unused
at the end of the fmt struct on 32 bit architectures.

The size of the pp struct on amd64 is decreased from 192 byte
to 184 byte and on 386 from 132 byte to 128 byte.

Simplify buffer size calculation in fmt_integer.

Consolidate test cases for intbuf overflow checks.

amd64 Haswell:

name                     old time/op  new time/op  delta
SprintfPadding-2          227ns ± 2%   227ns ± 1%    ~     (p=0.941 n=25+25)
SprintfEmpty-2           38.4ns ± 6%  35.8ns ±23%  -6.71%  (p=0.030 n=24+25)
SprintfString-2           100ns ± 0%   101ns ± 0%  +1.00%  (p=0.000 n=19+18)
SprintfTruncateString-2   142ns ± 1%   142ns ± 1%  +0.37%  (p=0.028 n=25+25)
SprintfQuoteString-2      397ns ± 0%   393ns ± 0%  -0.82%  (p=0.000 n=21+24)
SprintfInt-2              101ns ± 2%   102ns ± 4%    ~     (p=0.159 n=21+24)
SprintfIntInt-2           155ns ± 3%   155ns ± 3%    ~     (p=0.934 n=25+25)
SprintfPrefixedInt-2      252ns ± 2%   251ns ± 2%    ~     (p=0.198 n=25+25)
SprintfFloat-2            184ns ± 3%   179ns ± 2%  -3.07%  (p=0.000 n=24+25)
SprintfComplex-2          532ns ± 2%   535ns ± 2%  +0.64%  (p=0.046 n=25+24)
SprintfBoolean-2         90.5ns ± 3%  91.6ns ± 2%  +1.17%  (p=0.000 n=22+22)
SprintfHexString-2        164ns ± 2%   165ns ± 3%    ~     (p=0.066 n=25+25)
SprintfHexBytes-2         171ns ± 3%   170ns ± 3%    ~     (p=0.941 n=25+24)
SprintfBytes-2            320ns ± 1%   313ns ± 1%  -2.31%  (p=0.000 n=24+21)
SprintfStringer-2         347ns ± 2%   348ns ± 2%    ~     (p=0.426 n=24+24)
SprintfStructure-2        753ns ± 1%   742ns ± 1%  -1.49%  (p=0.000 n=24+25)
FprintInt-2               145ns ± 0%   144ns ± 0%  -0.69%  (p=0.000 n=24+23)
FprintfBytes-2            163ns ± 0%   163ns ± 0%  -0.27%  (p=0.000 n=25+25)
FprintIntNoAlloc-2        108ns ± 0%   105ns ± 0%  -2.78%  (p=0.000 n=25+2

386 Haswell:

name                     old time/op  new time/op  delta
SprintfPadding-2          426ns ± 2%   422ns ± 1%  -0.89%  (p=0.000 n=25+24)
SprintfEmpty-2           24.6ns ± 1%  24.5ns ± 0%  -0.39%  (p=0.000 n=22+15)
SprintfString-2          99.1ns ± 3%  95.6ns ± 0%  -3.52%  (p=0.000 n=25+24)
SprintfTruncateString-2   156ns ± 4%   153ns ± 1%  -1.65%  (p=0.000 n=24+23)
SprintfQuoteString-2      500ns ± 2%   493ns ± 1%  -1.49%  (p=0.000 n=25+24)
SprintfInt-2             92.6ns ± 9%  88.3ns ± 1%  -4.72%  (p=0.000 n=25+25)
SprintfIntInt-2           143ns ± 7%   137ns ± 2%  -4.01%  (p=0.000 n=23+25)
SprintfPrefixedInt-2      268ns ±19%   264ns ±16%    ~     (p=0.826 n=24+24)
SprintfFloat-2            242ns ± 4%   246ns ± 2%  +1.60%  (p=0.000 n=25+25)
SprintfComplex-2         1.04µs ± 3%  1.03µs ± 1%  -0.89%  (p=0.026 n=25+25)
SprintfBoolean-2         82.2ns ± 9%  80.7ns ± 2%    ~     (p=0.163 n=24+24)
SprintfHexString-2        240ns ± 5%   224ns ± 2%  -6.94%  (p=0.000 n=25+25)
SprintfHexBytes-2         245ns ± 3%   234ns ± 2%  -4.55%  (p=0.000 n=25+25)
SprintfBytes-2            432ns ±13%   419ns ± 2%    ~     (p=0.081 n=23+25)
SprintfStringer-2         356ns ± 4%   356ns ± 4%    ~     (p=0.988 n=25+25)
SprintfStructure-2        968ns ± 5%   948ns ± 2%  -2.11%  (p=0.000 n=25+25)
FprintInt-2               206ns ± 0%   201ns ± 0%  -2.43%  (p=0.000 n=25+21)
FprintfBytes-2            187ns ± 1%   187ns ± 1%    ~     (p=0.420 n=25+25)
FprintIntNoAlloc-2        173ns ± 0%   168ns ± 0%  -2.89%  (p=0.000 n=25+2

amd64 Ivy Bridge:

name                     old time/op  new time/op  delta
SprintfPadding-4          203ns ± 4%   210ns ± 8%  +3.27%  (p=0.000 n=23+25)
SprintfEmpty-4           24.4ns ± 2%  24.4ns ± 3%    ~     (p=0.487 n=24+25)
SprintfString-4          92.4ns ± 2%  93.1ns ± 3%    ~     (p=0.087 n=22+25)
SprintfTruncateString-4   137ns ± 3%   136ns ± 2%  -1.02%  (p=0.002 n=25+25)
SprintfQuoteString-4      378ns ± 1%   373ns ± 1%  -1.32%  (p=0.000 n=24+22)
SprintfInt-4             89.9ns ± 3%  90.3ns ± 4%    ~     (p=0.444 n=25+25)
SprintfIntInt-4           137ns ± 4%   138ns ± 3%    ~     (p=0.112 n=25+23)
SprintfPrefixedInt-4      155ns ±14%   154ns ±14%    ~     (p=0.791 n=25+25)
SprintfFloat-4            154ns ± 2%   154ns ± 3%    ~     (p=0.789 n=25+25)
SprintfComplex-4          396ns ± 2%   402ns ± 3%  +1.53%  (p=0.001 n=23+25)
SprintfBoolean-4         71.0ns ± 3%  71.2ns ± 2%    ~     (p=0.515 n=25+24)
SprintfHexString-4        156ns ± 3%   150ns ± 5%  -3.69%  (p=0.000 n=24+25)
SprintfHexBytes-4         154ns ± 3%   157ns ± 5%  +1.72%  (p=0.003 n=24+25)
SprintfBytes-4            297ns ± 4%   291ns ± 3%  -1.86%  (p=0.000 n=25+25)
SprintfStringer-4         275ns ± 3%   265ns ± 3%  -3.51%  (p=0.000 n=25+25)
SprintfStructure-4        878ns ± 2%   823ns ± 2%  -6.21%  (p=0.000 n=25+22)
FprintInt-4               145ns ± 1%   147ns ± 2%  +0.94%  (p=0.001 n=23+25)
FprintfBytes-4            166ns ± 1%   168ns ± 2%  +0.81%  (p=0.000 n=24+25)
FprintIntNoAlloc-4        113ns ± 2%   109ns ± 2%  -3.79%  (p=0.000 n=24+25)

386 Ivy Bridge:

name                     old time/op  new time/op  delta
SprintfPadding-4          353ns ± 4%   354ns ± 4%    ~     (p=0.769 n=25+24)
SprintfEmpty-4           21.9ns ± 6%  21.1ns ± 3%  -3.45%  (p=0.000 n=24+25)
SprintfString-4          94.7ns ± 1%  93.0ns ± 3%  -1.77%  (p=0.000 n=24+23)
SprintfTruncateString-4   150ns ± 2%   147ns ± 0%  -1.71%  (p=0.000 n=25+21)
SprintfQuoteString-4      472ns ± 1%   479ns ± 1%  +1.48%  (p=0.000 n=25+23)
SprintfInt-4             87.0ns ± 2%  85.3ns ± 2%  -1.95%  (p=0.000 n=25+25)
SprintfIntInt-4           137ns ± 2%   134ns ± 2%  -1.97%  (p=0.000 n=24+23)
SprintfPrefixedInt-4      166ns ± 8%   161ns ± 8%  -3.07%  (p=0.023 n=24+24)
SprintfFloat-4            226ns ± 1%   219ns ± 1%  -2.97%  (p=0.000 n=24+25)
SprintfComplex-4          867ns ± 1%   784ns ± 1%  -9.47%  (p=0.000 n=24+23)
SprintfBoolean-4         77.2ns ± 2%  76.0ns ± 2%  -1.63%  (p=0.000 n=25+25)
SprintfHexString-4        212ns ± 2%   214ns ± 2%  +0.96%  (p=0.000 n=25+25)
SprintfHexBytes-4         221ns ± 2%   218ns ± 1%  -1.42%  (p=0.000 n=25+24)
SprintfBytes-4            423ns ± 3%   417ns ± 1%  -1.49%  (p=0.000 n=25+24)
SprintfStringer-4         306ns ± 3%   298ns ± 3%  -2.57%  (p=0.000 n=24+25)
SprintfStructure-4       1.00µs ± 2%  0.98µs ± 2%  -1.34%  (p=0.000 n=24+24)
FprintInt-4               202ns ± 3%   197ns ± 2%  -2.04%  (p=0.000 n=25+25)
FprintfBytes-4            186ns ± 2%   184ns ± 2%  -0.88%  (p=0.000 n=24+25)
FprintIntNoAlloc-4        170ns ± 2%   166ns ± 2%  -2.26%  (p=0.000 n=24+25)

Change-Id: I46e62bf8b6afa90a24f75b40f1d354b2084b910b
Reviewed-on: https://go-review.googlesource.com/20984
Run-TryBot: Rob Pike <r@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRob Pike <r@golang.org>
parent 7f52b439
...@@ -335,6 +335,7 @@ var fmtTests = []struct { ...@@ -335,6 +335,7 @@ var fmtTests = []struct {
{"%b", -6, "-110"}, {"%b", -6, "-110"},
{"%b", ^uint32(0), "11111111111111111111111111111111"}, {"%b", ^uint32(0), "11111111111111111111111111111111"},
{"%b", ^uint64(0), "1111111111111111111111111111111111111111111111111111111111111111"}, {"%b", ^uint64(0), "1111111111111111111111111111111111111111111111111111111111111111"},
{"%b", int64(-1 << 63), zeroFill("-1", 63, "")},
{"%o", 01234, "1234"}, {"%o", 01234, "1234"},
{"%#o", 01234, "01234"}, {"%#o", 01234, "01234"},
{"%o", ^uint32(0), "37777777777"}, {"%o", ^uint32(0), "37777777777"},
...@@ -359,6 +360,15 @@ var fmtTests = []struct { ...@@ -359,6 +360,15 @@ var fmtTests = []struct {
{"%-#20.8X", 0x1234abc, "0X01234ABC "}, {"%-#20.8X", 0x1234abc, "0X01234ABC "},
{"%-#20.8o", 01234, "00001234 "}, {"%-#20.8o", 01234, "00001234 "},
// Test correct f.intbuf overflow checks.
{"%068d", 1, zeroFill("", 68, "1")},
{"%068d", -1, zeroFill("-", 67, "1")},
{"%#.68x", 42, zeroFill("0x", 68, "2a")},
{"%.68d", -42, zeroFill("-", 68, "42")},
{"%+.68d", 42, zeroFill("+", 68, "42")},
{"% .68d", 42, zeroFill(" ", 68, "42")},
{"% +.68d", 42, zeroFill("+", 68, "42")},
// unicode format // unicode format
{"%U", 0, "U+0000"}, {"%U", 0, "U+0000"},
{"%U", -1, "U+FFFFFFFFFFFFFFFF"}, {"%U", -1, "U+FFFFFFFFFFFFFFFF"},
...@@ -375,8 +385,8 @@ var fmtTests = []struct { ...@@ -375,8 +385,8 @@ var fmtTests = []struct {
{"%#-14.6U", '⌘', "U+002318 '⌘' "}, {"%#-14.6U", '⌘', "U+002318 '⌘' "},
{"%#014.6U", '⌘', " U+002318 '⌘'"}, {"%#014.6U", '⌘', " U+002318 '⌘'"},
{"%#-014.6U", '⌘', "U+002318 '⌘' "}, {"%#-014.6U", '⌘', "U+002318 '⌘' "},
{"%.80U", uint(42), zeroFill("U+", 80, "2A")}, {"%.68U", uint(42), zeroFill("U+", 68, "2A")},
{"%#.80U", '日', zeroFill("U+", 80, "65E5") + " '日'"}, {"%#.68U", '日', zeroFill("U+", 68, "65E5") + " '日'"},
// floats // floats
{"%+.3e", 0.0, "+0.000e+00"}, {"%+.3e", 0.0, "+0.000e+00"},
...@@ -406,6 +416,9 @@ var fmtTests = []struct { ...@@ -406,6 +416,9 @@ var fmtTests = []struct {
// Precision has no effect for binary float format. // Precision has no effect for binary float format.
{"%.4b", float32(1.0), "8388608p-23"}, {"%.4b", float32(1.0), "8388608p-23"},
{"%.4b", -1.0, "-4503599627370496p-52"}, {"%.4b", -1.0, "-4503599627370496p-52"},
// Test correct f.intbuf boundary checks.
{"%.68f", 1.0, zeroFill("1.", 68, "")},
{"%.68f", -1.0, zeroFill("-1.", 68, "")},
// float infinites and NaNs // float infinites and NaNs
{"%f", posInf, "+Inf"}, {"%f", posInf, "+Inf"},
{"%.1f", negInf, "-Inf"}, {"%.1f", negInf, "-Inf"},
...@@ -795,22 +808,6 @@ var fmtTests = []struct { ...@@ -795,22 +808,6 @@ var fmtTests = []struct {
// This test is just to check that it shows the two NaNs at all. // This test is just to check that it shows the two NaNs at all.
{"%v", map[float64]int{NaN: 1, NaN: 2}, "map[NaN:<nil> NaN:<nil>]"}, {"%v", map[float64]int{NaN: 1, NaN: 2}, "map[NaN:<nil> NaN:<nil>]"},
// Used to crash because nByte didn't allow for a sign.
{"%b", int64(-1 << 63), zeroFill("-1", 63, "")},
// Used to panic.
{"%0100d", 1, zeroFill("", 100, "1")},
{"%0100d", -1, zeroFill("-", 99, "1")},
{"%0.100f", 1.0, zeroFill("1.", 100, "")},
{"%0.100f", -1.0, zeroFill("-1.", 100, "")},
// Used to panic: integer function didn't look at f.prec, f.unicode, f.width or sign.
{"%#.65x", 42, zeroFill("0x", 65, "2a")},
{"%.65d", -42, zeroFill("-", 65, "42")},
{"%+.65d", 42, zeroFill("+", 65, "42")},
{"% .65d", 42, zeroFill(" ", 65, "42")},
{"% +.65d", 42, zeroFill("+", 65, "42")},
// Comparison of padding rules with C printf. // Comparison of padding rules with C printf.
/* /*
C program: C program:
...@@ -882,10 +879,6 @@ var fmtTests = []struct { ...@@ -882,10 +879,6 @@ var fmtTests = []struct {
{"%7.2f", 1 + 2i, "( 1.00 +2.00i)"}, {"%7.2f", 1 + 2i, "( 1.00 +2.00i)"},
{"%+07.2f", -1 - 2i, "(-001.00-002.00i)"}, {"%+07.2f", -1 - 2i, "(-001.00-002.00i)"},
{"%20f", -1.0, " -1.000000"},
// Make sure we can handle very large widths.
{"%0100f", -1.0, zeroFill("-", 99, "1.000000")},
// Use spaces instead of zero if padding to the right. // Use spaces instead of zero if padding to the right.
{"%0-5s", "abc", "abc "}, {"%0-5s", "abc", "abc "},
{"%-05.1f", 1.0, "1.0 "}, {"%-05.1f", 1.0, "1.0 "},
...@@ -908,27 +901,6 @@ var fmtTests = []struct { ...@@ -908,27 +901,6 @@ var fmtTests = []struct {
// Incomplete format specification caused crash. // Incomplete format specification caused crash.
{"%.", 3, "%!.(int=3)"}, {"%.", 3, "%!.(int=3)"},
// Used to panic with out-of-bounds for very large numeric representations.
// nByte is set to handle one bit per uint64 in %b format, with a negative number.
// See issue 6777.
{"%#064x", 1, zeroFill("0x", 64, "1")},
{"%#064x", -1, zeroFill("-0x", 63, "1")},
{"%#064b", 1, zeroFill("", 64, "1")},
{"%#064b", -1, zeroFill("-", 63, "1")},
{"%#064o", 1, zeroFill("", 64, "1")},
{"%#064o", -1, zeroFill("-", 63, "1")},
{"%#064d", 1, zeroFill("", 64, "1")},
{"%#064d", -1, zeroFill("-", 63, "1")},
// Test that we handle the crossover above the size of uint64
{"%#072x", 1, zeroFill("0x", 72, "1")},
{"%#072x", -1, zeroFill("-0x", 71, "1")},
{"%#072b", 1, zeroFill("", 72, "1")},
{"%#072b", -1, zeroFill("-", 71, "1")},
{"%#072o", 1, zeroFill("", 72, "1")},
{"%#072o", -1, zeroFill("-", 71, "1")},
{"%#072d", 1, zeroFill("", 72, "1")},
{"%#072d", -1, zeroFill("-", 71, "1")},
// Padding for complex numbers. Has been bad, then fixed, then bad again. // Padding for complex numbers. Has been bad, then fixed, then bad again.
{"%+10.2f", +104.66 + 440.51i, "( +104.66 +440.51i)"}, {"%+10.2f", +104.66 + 440.51i, "( +104.66 +440.51i)"},
{"%+10.2f", -104.66 + 440.51i, "( -104.66 +440.51i)"}, {"%+10.2f", -104.66 + 440.51i, "( -104.66 +440.51i)"},
......
...@@ -10,10 +10,6 @@ import ( ...@@ -10,10 +10,6 @@ import (
) )
const ( const (
// %b of an int64, plus a sign.
// Hex can add 0x and we handle it specially.
nByte = 65
ldigits = "0123456789abcdefx" ldigits = "0123456789abcdefx"
udigits = "0123456789ABCDEFX" udigits = "0123456789ABCDEFX"
) )
...@@ -43,12 +39,16 @@ type fmtFlags struct { ...@@ -43,12 +39,16 @@ type fmtFlags struct {
// A fmt is the raw formatter used by Printf etc. // A fmt is the raw formatter used by Printf etc.
// It prints into a buffer that must be set up separately. // It prints into a buffer that must be set up separately.
type fmt struct { type fmt struct {
intbuf [nByte]byte buf *buffer
buf *buffer
// width, precision
wid int
prec int
fmtFlags fmtFlags
wid int // width
prec int // precision
// intbuf is large enought to store %b of an int64 with a sign and
// avoids padding at the end of the struct on 32 bit architectures.
intbuf [68]byte
} }
func (f *fmt) clearflags() { func (f *fmt) clearflags() {
...@@ -136,14 +136,14 @@ func (f *fmt) fmt_unicode(u uint64) { ...@@ -136,14 +136,14 @@ func (f *fmt) fmt_unicode(u uint64) {
buf := f.intbuf[0:] buf := f.intbuf[0:]
// With default precision set the maximum needed buf length is 18 // With default precision set the maximum needed buf length is 18
// for formatting -1 with %#U ("U+FFFFFFFFFFFFFFFF") // for formatting -1 with %#U ("U+FFFFFFFFFFFFFFFF") which fits
// which fits into the already allocated intbuf with a capacity of 65 bytes. // into the already allocated intbuf with a capacity of 68 bytes.
prec := 4 prec := 4
if f.precPresent && f.prec > 4 { if f.precPresent && f.prec > 4 {
prec = f.prec prec = f.prec
// Compute space needed for "U+" , number, " '", character, "'". // Compute space needed for "U+" , number, " '", character, "'".
width := 2 + prec + 2 + utf8.UTFMax + 1 width := 2 + prec + 2 + utf8.UTFMax + 1
if width > cap(buf) { if width > len(buf) {
buf = make([]byte, width) buf = make([]byte, width)
} }
} }
...@@ -205,17 +205,13 @@ func (f *fmt) fmt_integer(u uint64, base int, isSigned bool, digits string) { ...@@ -205,17 +205,13 @@ func (f *fmt) fmt_integer(u uint64, base int, isSigned bool, digits string) {
u = -u u = -u
} }
var buf []byte = f.intbuf[0:] buf := f.intbuf[0:]
if f.widPresent || f.precPresent || f.plus || f.space { // The already allocated f.intbuf with a capacity of 68 bytes
width := f.wid + f.prec // Only one will be set, both are positive; this provides the maximum. // is large enough for integer formatting when no precision or width is set.
if base == 16 && f.sharp { if f.widPresent || f.precPresent {
// Also adds "0x". // Account 3 extra bytes for possible addition of a sign and "0x".
width += 2 width := 3 + f.wid + f.prec // wid and prec are always positive.
} if width > len(buf) {
if negative || f.plus || f.space {
width++
}
if width > nByte {
// We're going to need a bigger boat. // We're going to need a bigger boat.
buf = make([]byte, width) buf = make([]byte, width)
} }
......
...@@ -101,20 +101,27 @@ func (bp *buffer) WriteRune(r rune) { ...@@ -101,20 +101,27 @@ func (bp *buffer) WriteRune(r rune) {
*bp = b[:n+w] *bp = b[:n+w]
} }
// pp is used to store a printer's state and is reused with sync.Pool to avoid allocations.
type pp struct { type pp struct {
panicking bool buf buffer
erroring bool // printing an error condition
buf buffer
// arg holds the current item, as an interface{}. // arg holds the current item, as an interface{}.
arg interface{} arg interface{}
// value holds the current item, as a reflect.Value, and will be
// the zero Value if the item has not been reflected. // value is used instead of arg for reflect values.
value reflect.Value value reflect.Value
// fmt is used to format basic items such as integers or strings.
fmt fmt
// reordered records whether the format string used argument reordering. // reordered records whether the format string used argument reordering.
reordered bool reordered bool
// goodArgNum records whether the most recent reordering directive was valid. // goodArgNum records whether the most recent reordering directive was valid.
goodArgNum bool goodArgNum bool
fmt fmt // panicking is set by catchPanic to avoid infinite panic, recover, panic, ... recursion.
panicking bool
// erroring is set when printing an error string to guard against calling handleMethods.
erroring bool
} }
var ppFree = sync.Pool{ var ppFree = sync.Pool{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment