From bace9523eed9bc695310cd327b19ecdf7aa44612 Mon Sep 17 00:00:00 2001 From: Dmitriy Vyukov <dvyukov@google.com> Date: Mon, 27 Jan 2014 15:11:12 +0400 Subject: [PATCH] runtime: smarter slice grow When growing slice take into account size of the allocated memory block. Also apply the same optimization to string->[]byte conversion. Fixes #6307. benchmark old ns/op new ns/op delta BenchmarkAppendGrowByte 4541036 4434108 -2.35% BenchmarkAppendGrowString 59885673 44813604 -25.17% LGTM=khr R=khr CC=golang-codereviews, iant, rsc https://golang.org/cl/53340044 --- src/pkg/runtime/append_test.go | 19 ++++++++++++++ src/pkg/runtime/malloc.h | 1 + src/pkg/runtime/msize.c | 15 +++++++++++ src/pkg/runtime/slice.c | 46 +++++++++++++++++++++++++++------- src/pkg/runtime/string.goc | 18 ++++++++++--- 5 files changed, 86 insertions(+), 13 deletions(-) diff --git a/src/pkg/runtime/append_test.go b/src/pkg/runtime/append_test.go index 937c8259fd..a67dc9b494 100644 --- a/src/pkg/runtime/append_test.go +++ b/src/pkg/runtime/append_test.go @@ -19,6 +19,25 @@ func BenchmarkAppend(b *testing.B) { } } +func BenchmarkAppendGrowByte(b *testing.B) { + for i := 0; i < b.N; i++ { + var x []byte + for j := 0; j < 1<<20; j++ { + x = append(x, byte(j)) + } + } +} + +func BenchmarkAppendGrowString(b *testing.B) { + var s string + for i := 0; i < b.N; i++ { + var x []string + for j := 0; j < 1<<20; j++ { + x = append(x, s) + } + } +} + func benchmarkAppendBytes(b *testing.B, length int) { b.StopTimer() x := make([]byte, 0, N) diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index 8122b4b0b8..4146299223 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -273,6 +273,7 @@ extern MStats mstats; // making new objects in class i int32 runtime路SizeToClass(int32); +uintptr runtime路roundupsize(uintptr); extern int32 runtime路class_to_size[NumSizeClasses]; extern int32 runtime路class_to_allocnpages[NumSizeClasses]; extern int8 runtime路size_to_class8[1024/8 + 1]; diff --git a/src/pkg/runtime/msize.c b/src/pkg/runtime/msize.c index 85088fdf46..63d5ef490e 100644 --- a/src/pkg/runtime/msize.c +++ b/src/pkg/runtime/msize.c @@ -162,3 +162,18 @@ dump: } runtime路throw("InitSizes failed"); } + +// Returns size of the memory block that mallocgc will allocate if you ask for the size. +uintptr +runtime路roundupsize(uintptr size) +{ + if(size < MaxSmallSize) { + if(size <= 1024-8) + return runtime路class_to_size[runtime路size_to_class8[(size+7)>>3]]; + else + return runtime路class_to_size[runtime路size_to_class128[(size-1024+127) >> 7]]; + } + if(size + PageSize < size) + return size; + return ROUND(size, PageSize); +} diff --git a/src/pkg/runtime/slice.c b/src/pkg/runtime/slice.c index ef8ab7fe0a..c3b240bc83 100644 --- a/src/pkg/runtime/slice.c +++ b/src/pkg/runtime/slice.c @@ -8,6 +8,7 @@ #include "typekind.h" #include "malloc.h" #include "race.h" +#include "stack.h" #include "../../cmd/ld/textflag.h" enum @@ -92,26 +93,53 @@ runtime路growslice(SliceType *t, Slice old, int64 n, Slice ret) static void growslice1(SliceType *t, Slice x, intgo newcap, Slice *ret) { - intgo m; + intgo newcap1; + uintptr capmem, lenmem; + int32 flag; + Type *typ; + + typ = t->elem; + if(typ->size == 0) { + *ret = x; + ret->cap = newcap; + return; + } - m = x.cap; + newcap1 = x.cap; // Using newcap directly for m+m < newcap handles // both the case where m == 0 and also the case where // m+m/4 wraps around, in which case the loop // below might never terminate. - if(m+m < newcap) - m = newcap; + if(newcap1+newcap1 < newcap) + newcap1 = newcap; else { do { if(x.len < 1024) - m += m; + newcap1 += newcap1; else - m += m/4; - } while(m < newcap); + newcap1 += newcap1/4; + } while(newcap1 < newcap); } - makeslice1(t, x.len, m, ret); - runtime路memmove(ret->array, x.array, ret->len * t->elem->size); + + if(newcap1 > MaxMem/typ->size) + runtime路panicstring("growslice: cap out of range"); + capmem = runtime路roundupsize(newcap1*typ->size); + flag = FlagNoZero; + if(typ->kind&KindNoPointers) + flag |= FlagNoScan; + // Here we allocate with FlagNoZero but potentially w/o FlagNoScan, + // GC must not see this blocks until memclr below. + m->locks++; + ret->array = runtime路mallocgc(capmem, (uintptr)typ|TypeInfo_Array, flag); + ret->len = x.len; + ret->cap = capmem/typ->size; + lenmem = x.len*typ->size; + runtime路memmove(ret->array, x.array, lenmem); + runtime路memclr(ret->array+lenmem, capmem-lenmem); + m->locks--; + if(m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack + g->stackguard0 = StackPreempt; } // copy(to any, fr any, wid uintptr) int diff --git a/src/pkg/runtime/string.goc b/src/pkg/runtime/string.goc index 8eff05a843..407188cfe6 100644 --- a/src/pkg/runtime/string.goc +++ b/src/pkg/runtime/string.goc @@ -78,6 +78,7 @@ runtime路gostringn(byte *str, intgo l) return s; } +// used by cmd/cgo Slice runtime路gobytes(byte *p, intgo n) { @@ -278,10 +279,15 @@ func slicebytetostring(b Slice) (s String) { } func stringtoslicebyte(s String) (b Slice) { - b.array = runtime路mallocgc(s.len, 0, FlagNoScan|FlagNoZero); + uintptr cap; + + cap = runtime路roundupsize(s.len); + b.array = runtime路mallocgc(cap, 0, FlagNoScan|FlagNoZero); b.len = s.len; - b.cap = s.len; + b.cap = cap; runtime路memmove(b.array, s.str, s.len); + if(cap != b.len) + runtime路memclr(b.array+b.len, cap-b.len); } func slicerunetostring(b Slice) (s String) { @@ -316,6 +322,7 @@ func stringtoslicerune(s String) (b Slice) { intgo n; int32 dum, *r; uint8 *p, *ep; + uintptr mem; // two passes. // unlike slicerunetostring, no race because strings are immutable. @@ -327,13 +334,16 @@ func stringtoslicerune(s String) (b Slice) { n++; } - b.array = runtime路mallocgc(n*sizeof(r[0]), 0, FlagNoScan|FlagNoZero); + mem = runtime路roundupsize(n*sizeof(r[0])); + b.array = runtime路mallocgc(mem, 0, FlagNoScan|FlagNoZero); b.len = n; - b.cap = n; + b.cap = mem/sizeof(r[0]); p = s.str; r = (int32*)b.array; while(p < ep) p += runtime路charntorune(r++, p, ep-p); + if(b.cap > b.len) + runtime路memclr(b.array+b.len*sizeof(r[0]), (b.cap-b.len)*sizeof(r[0])); } enum -- 2.30.9