Commit 010d8948 authored by Russ Cox's avatar Russ Cox

go/constant: make string addition compute actual string lazily

It is natural for tools to take a large string concatenation like

	"1" + "1" + "1" + ... + "1"

and translate that into a sequence of go/constant calls:

	x := constant.MakeString("1")
	x = constant.BinaryOp(x, token.ADD, constant.MakeString("1"))
	x = constant.BinaryOp(x, token.ADD, constant.MakeString("1"))
	x = constant.BinaryOp(x, token.ADD, constant.MakeString("1"))
	x = constant.BinaryOp(x, token.ADD, constant.MakeString("1"))
	...

If the underlying representation of a string constant is a Go string,
then this leads to O(N²) memory for the concatenation of N strings,
allocating memory for "1", "11", "111", "1111", and so on.
This makes go/types and in particular cmd/vet run out of memory
(or at least use far too much) on machine-generated string concatenations,
such as those generated by go-bindata.

This CL allows code like the above to operate efficiently, by delaying
the evaluation of the actual string constant value until it is needed.
Now the representation of a string constant is either a string or an
explicit addition expression. The addition expression is turned into
a string the first time it is requested and then cached for future use.
This slows down the use of single strings, but analyses are likely not
dominated by that use anyway. It speeds up string concatenations,
especially large ones, significantly.

On my Mac running 32-bit code:

name               old time/op    new time/op    delta
StringAdd/1-8         160ns ± 2%     183ns ± 1%  +13.98%  (p=0.000 n=10+10)
StringAdd/4-8         650ns ± 1%     927ns ± 4%  +42.73%  (p=0.000 n=10+10)
StringAdd/16-8       3.93µs ± 1%    2.78µs ± 2%  -29.12%  (p=0.000 n=8+9)
StringAdd/64-8       37.3µs ± 9%    10.1µs ± 5%  -73.06%  (p=0.000 n=10+10)
StringAdd/256-8       513µs ± 5%      38µs ± 1%  -92.63%  (p=0.000 n=10+10)
StringAdd/1024-8     5.67ms ± 4%    0.14ms ± 2%  -97.45%  (p=0.000 n=8+10)
StringAdd/4096-8     77.1ms ± 9%     0.7ms ± 2%  -99.10%  (p=0.000 n=10+9)
StringAdd/16384-8     1.33s ± 7%     0.00s ±10%  -99.64%  (p=0.000 n=10+10)
StringAdd/65536-8     21.5s ± 4%      0.0s ± 8%  -99.89%  (p=0.000 n=10+10)

name               old alloc/op   new alloc/op   delta
StringAdd/1-8          232B ± 0%      256B ± 0%  +10.34%  (p=0.000 n=10+10)
StringAdd/4-8        1.20kB ± 0%    1.24kB ± 0%   +3.33%  (p=0.000 n=10+10)
StringAdd/16-8       14.7kB ± 0%     4.6kB ± 0%  -68.87%  (p=0.000 n=10+10)
StringAdd/64-8        223kB ± 0%      16kB ± 0%  -92.66%  (p=0.000 n=10+10)
StringAdd/256-8      3.48MB ± 0%    0.07MB ± 0%  -98.07%  (p=0.000 n=10+10)
StringAdd/1024-8     55.7MB ± 0%     0.3MB ± 0%  -99.53%  (p=0.000 n=10+10)
StringAdd/4096-8      855MB ± 0%       1MB ± 0%  -99.88%  (p=0.000 n=10+10)
StringAdd/16384-8    13.5GB ± 0%     0.0GB ± 0%  -99.97%  (p=0.000 n=9+10)
StringAdd/65536-8     215GB ± 0%       0GB ± 0%  -99.99%  (p=0.000 n=10+10)

name               old allocs/op  new allocs/op  delta
StringAdd/1-8          3.00 ± 0%      3.00 ± 0%     ~     (all equal)
StringAdd/4-8          9.00 ± 0%     11.00 ± 0%  +22.22%  (p=0.000 n=10+10)
StringAdd/16-8         33.0 ± 0%      25.0 ± 0%  -24.24%  (p=0.000 n=10+10)
StringAdd/64-8          129 ± 0%        75 ± 0%  -41.86%  (p=0.000 n=10+10)
StringAdd/256-8         513 ± 0%       269 ± 0%  -47.56%  (p=0.000 n=10+10)
StringAdd/1024-8      2.05k ± 0%     1.04k ± 0%  -49.29%  (p=0.000 n=10+10)
StringAdd/4096-8      8.19k ± 0%     4.12k ± 0%  -49.77%  (p=0.000 n=10+10)
StringAdd/16384-8     32.8k ± 0%     16.4k ± 0%  -49.97%  (p=0.000 n=9+10)
StringAdd/65536-8      131k ± 0%       66k ± 0%  -50.11%  (p=0.000 n=10+10)

https://perf.golang.org/search?q=upload:20180105.2

Fixes #23348 (originally reported as cmd/vet failures in comments on #23222).

This makes constant.Values of Kind String no longer meaningful for ==, which
required fixes in go/types. While there, also fix go/types handling of constant.Values
of Kind Int (for uint64), Float, and Complex.

Change-Id: I80867bc9c4232c5c9b213443ff16645434a68b36
Reviewed-on: https://go-review.googlesource.com/86395
Run-TryBot: Russ Cox <rsc@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarRobert Griesemer <gri@golang.org>
parent 26222ddc
......@@ -18,6 +18,8 @@ import (
"math"
"math/big"
"strconv"
"strings"
"sync"
"unicode/utf8"
)
......@@ -67,7 +69,12 @@ const prec = 512
type (
unknownVal struct{}
boolVal bool
stringVal string
stringVal struct {
// Lazy value: either a string (l,r==nil) or an addition (l,r!=nil).
mu sync.Mutex
s string
l, r *stringVal
}
int64Val int64 // Int values representable as an int64
intVal struct{ val *big.Int } // Int values not representable as an int64
ratVal struct{ val *big.Rat } // Float values representable as a fraction
......@@ -77,7 +84,7 @@ type (
func (unknownVal) Kind() Kind { return Unknown }
func (boolVal) Kind() Kind { return Bool }
func (stringVal) Kind() Kind { return String }
func (*stringVal) Kind() Kind { return String }
func (int64Val) Kind() Kind { return Int }
func (intVal) Kind() Kind { return Int }
func (ratVal) Kind() Kind { return Float }
......@@ -88,9 +95,9 @@ func (unknownVal) String() string { return "unknown" }
func (x boolVal) String() string { return strconv.FormatBool(bool(x)) }
// String returns a possibly shortened quoted form of the String value.
func (x stringVal) String() string {
func (x *stringVal) String() string {
const maxLen = 72 // a reasonable length
s := strconv.Quote(string(x))
s := strconv.Quote(x.string())
if utf8.RuneCountInString(s) > maxLen {
// The string without the enclosing quotes is greater than maxLen-2 runes
// long. Remove the last 3 runes (including the closing '"') by keeping
......@@ -105,6 +112,60 @@ func (x stringVal) String() string {
return s
}
// string constructs and returns the actual string literal value.
// If x represents an addition, then it rewrites x to be a single
// string, to speed future calls. This lazy construction avoids
// building different string values for all subpieces of a large
// concatenation. See golang.org/issue/23348.
func (x *stringVal) string() string {
x.mu.Lock()
if x.l != nil {
x.s = strings.Join(reverse(x.appendReverse(nil)), "")
x.l = nil
x.r = nil
}
s := x.s
x.mu.Unlock()
return s
}
// reverse reverses x in place and returns it.
func reverse(x []string) []string {
n := len(x)
for i := 0; i+i < n; i++ {
x[i], x[n-1-i] = x[n-1-i], x[i]
}
return x
}
// appendReverse appends to list all of x's subpieces, but in reverse,
// and returns the result. Appending the reversal allows processing
// the right side in a recursive call and the left side in a loop.
// Because a chain like a + b + c + d + e is actually represented
// as ((((a + b) + c) + d) + e), the left-side loop avoids deep recursion.
// x must be locked.
func (x *stringVal) appendReverse(list []string) []string {
y := x
for y.r != nil {
y.r.mu.Lock()
list = y.r.appendReverse(list)
y.r.mu.Unlock()
l := y.l
if y != x {
y.mu.Unlock()
}
l.mu.Lock()
y = l
}
s := y.s
if y != x {
y.mu.Unlock()
}
return append(list, s)
}
func (x int64Val) String() string { return strconv.FormatInt(int64(x), 10) }
func (x intVal) String() string { return x.val.String() }
func (x ratVal) String() string { return rtof(x).String() }
......@@ -160,7 +221,7 @@ func (x complexVal) String() string { return fmt.Sprintf("(%s + %si)", x.re, x.i
func (x unknownVal) ExactString() string { return x.String() }
func (x boolVal) ExactString() string { return x.String() }
func (x stringVal) ExactString() string { return strconv.Quote(string(x)) }
func (x *stringVal) ExactString() string { return strconv.Quote(x.string()) }
func (x int64Val) ExactString() string { return x.String() }
func (x intVal) ExactString() string { return x.String() }
......@@ -180,7 +241,7 @@ func (x complexVal) ExactString() string {
func (unknownVal) implementsValue() {}
func (boolVal) implementsValue() {}
func (stringVal) implementsValue() {}
func (*stringVal) implementsValue() {}
func (int64Val) implementsValue() {}
func (ratVal) implementsValue() {}
func (intVal) implementsValue() {}
......@@ -283,7 +344,7 @@ func MakeUnknown() Value { return unknownVal{} }
func MakeBool(b bool) Value { return boolVal(b) }
// MakeString returns the String value for s.
func MakeString(s string) Value { return stringVal(s) }
func MakeString(s string) Value { return &stringVal{s: s} }
// MakeInt64 returns the Int value for x.
func MakeInt64(x int64) Value { return int64Val(x) }
......@@ -382,8 +443,8 @@ func BoolVal(x Value) bool {
// If x is Unknown, the result is "".
func StringVal(x Value) string {
switch x := x.(type) {
case stringVal:
return string(x)
case *stringVal:
return x.string()
case unknownVal:
return ""
default:
......@@ -856,7 +917,7 @@ func ord(x Value) int {
return -1
case unknownVal:
return 0
case boolVal, stringVal:
case boolVal, *stringVal:
return 1
case int64Val:
return 2
......@@ -884,7 +945,7 @@ func match(x, y Value) (_, _ Value) {
// ord(x) <= ord(y)
switch x := x.(type) {
case boolVal, stringVal, complexVal:
case boolVal, *stringVal, complexVal:
return x, y
case int64Val:
......@@ -1108,9 +1169,9 @@ func BinaryOp(x_ Value, op token.Token, y_ Value) Value {
}
return makeComplex(re, im)
case stringVal:
case *stringVal:
if op == token.ADD {
return x + y.(stringVal)
return &stringVal{l: x, r: y.(*stringVal)}
}
}
......@@ -1236,21 +1297,22 @@ func Compare(x_ Value, op token.Token, y_ Value) bool {
return !re || !im
}
case stringVal:
y := y.(stringVal)
case *stringVal:
xs := x.string()
ys := y.(*stringVal).string()
switch op {
case token.EQL:
return x == y
return xs == ys
case token.NEQ:
return x != y
return xs != ys
case token.LSS:
return x < y
return xs < ys
case token.LEQ:
return x <= y
return xs <= ys
case token.GTR:
return x > y
return xs > ys
case token.GEQ:
return x >= y
return xs >= ys
}
}
......
......@@ -5,6 +5,7 @@
package constant
import (
"fmt"
"go/token"
"strings"
"testing"
......@@ -449,3 +450,23 @@ func TestUnknown(t *testing.T) {
}
}
}
func BenchmarkStringAdd(b *testing.B) {
for size := 1; size <= 65536; size *= 4 {
b.Run(fmt.Sprint(size), func(b *testing.B) {
b.ReportAllocs()
n := int64(0)
for i := 0; i < b.N; i++ {
x := MakeString(strings.Repeat("x", 100))
y := x
for j := 0; j < size-1; j++ {
y = BinaryOp(y, token.ADD, x)
}
n += int64(len(StringVal(y)))
}
if n != int64(b.N)*int64(size)*100 {
b.Fatalf("bad string %d != %d", n, int64(b.N)*int64(size)*100)
}
})
}
}
......@@ -1194,17 +1194,18 @@ func (check *Checker) exprInternal(x *operand, e ast.Expr, hint Type) exprKind {
if x.mode == constant_ {
duplicate := false
// if the key is of interface type, the type is also significant when checking for duplicates
xkey := keyVal(x.val)
if _, ok := utyp.key.Underlying().(*Interface); ok {
for _, vtyp := range visited[x.val] {
for _, vtyp := range visited[xkey] {
if Identical(vtyp, x.typ) {
duplicate = true
break
}
}
visited[x.val] = append(visited[x.val], x.typ)
visited[xkey] = append(visited[xkey], x.typ)
} else {
_, duplicate = visited[x.val]
visited[x.val] = nil
_, duplicate = visited[xkey]
visited[xkey] = nil
}
if duplicate {
check.errorf(x.pos(), "duplicate key %s in map literal", x.val)
......@@ -1508,6 +1509,30 @@ Error:
return statement // avoid follow-up errors
}
func keyVal(x constant.Value) interface{} {
switch x.Kind() {
case constant.Bool:
return constant.BoolVal(x)
case constant.String:
return constant.StringVal(x)
case constant.Int:
if v, ok := constant.Int64Val(x); ok {
return v
}
if v, ok := constant.Uint64Val(x); ok {
return v
}
case constant.Float:
v, _ := constant.Float64Val(x)
return v
case constant.Complex:
r, _ := constant.Float64Val(constant.Real(x))
i, _ := constant.Float64Val(constant.Imag(x))
return complex(r, i)
}
return x
}
// typeAssertion checks that x.(T) is legal; xtyp must be the type of x.
func (check *Checker) typeAssertion(pos token.Pos, x *operand, xtyp *Interface, T Type) {
method, wrongType := assertableTo(xtyp, T)
......
......@@ -367,6 +367,10 @@ func map_literals() {
_ = map[interface{}]int{"a": 1, "a" /* ERROR "duplicate key" */ : 1}
_ = map[interface{}]int{"a": 1, S("a"): 1}
_ = map[interface{}]int{S("a"): 1, S /* ERROR "duplicate key" */ ("a"): 1}
_ = map[interface{}]int{1.0: 1, 1.0 /* ERROR "duplicate key" */: 1}
_ = map[interface{}]int{int64(-1): 1, int64 /* ERROR "duplicate key" */ (-1) : 1}
_ = map[interface{}]int{^uint64(0): 1, ^ /* ERROR "duplicate key" */ uint64(0): 1}
_ = map[interface{}]int{complex(1,2): 1, complex /* ERROR "duplicate key" */ (1,2) : 1}
type I interface {
f()
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment