Commit 14e59511 authored by Dmitry Vyukov's avatar Dmitry Vyukov

runtime: increase malloc size classes

When we calculate class sizes, in some cases we discard considerable
amounts of memory without an apparent reason. For example, we choose
size 8448 with 6 objects in 7 pages. But we can well use object
size 9472, which is also 6 objects in 7 pages but +1024 bytes (+12.12%).

Increase class sizes to the max value that leads to the same
page count/number of objects. Full list of affected size classes:

class 36: pages: 2 size: 1664->1792 +128 (7.69%)
class 39: pages: 1 size: 2560->2688 +128 (5.0%)
class 40: pages: 3 size: 2816->3072 +256 (9.9%)
class 41: pages: 2 size: 3072->3200 +128 (4.16%)
class 42: pages: 3 size: 3328->3456 +128 (3.84%)
class 44: pages: 3 size: 4608->4864 +256 (5.55%)
class 47: pages: 4 size: 6400->6528 +128 (2.0%)
class 48: pages: 5 size: 6656->6784 +128 (1.92%)
class 51: pages: 7 size: 8448->9472 +1024 (12.12%)
class 52: pages: 6 size: 8704->9728 +1024 (11.76%)
class 53: pages: 5 size: 9472->10240 +768 (8.10%)
class 54: pages: 4 size: 10496->10880 +384 (3.65%)
class 57: pages: 7 size: 14080->14336 +256 (1.81%)
class 59: pages: 9 size: 16640->18432 +1792 (10.76%)
class 60: pages: 7 size: 17664->19072 +1408 (7.97%)
class 62: pages: 8 size: 21248->21760 +512 (2.40%)
class 64: pages: 10 size: 24832->27264 +2432 (9.79%)
class 65: pages: 7 size: 28416->28672 +256 (0.90%)

name                      old time/op    new time/op    delta
BinaryTree17-12              2.59s ± 5%     2.52s ± 4%    ~     (p=0.132 n=6+6)
Fannkuch11-12                2.13s ± 3%     2.17s ± 3%    ~     (p=0.180 n=6+6)
FmtFprintfEmpty-12          47.0ns ± 3%    46.6ns ± 1%    ~     (p=0.355 n=6+5)
FmtFprintfString-12          131ns ± 0%     131ns ± 1%    ~     (p=0.476 n=4+6)
FmtFprintfInt-12             121ns ± 6%     122ns ± 2%    ~     (p=0.511 n=6+6)
FmtFprintfIntInt-12          182ns ± 2%     186ns ± 1%  +2.20%  (p=0.015 n=6+6)
FmtFprintfPrefixedInt-12     184ns ± 5%     181ns ± 2%    ~     (p=0.645 n=6+6)
FmtFprintfFloat-12           272ns ± 7%     265ns ± 1%    ~     (p=1.000 n=6+5)
FmtManyArgs-12               783ns ± 2%     802ns ± 2%  +2.38%  (p=0.017 n=6+6)
GobDecode-12                7.04ms ± 4%    7.00ms ± 2%    ~     (p=1.000 n=6+6)
GobEncode-12                6.36ms ± 6%    6.17ms ± 6%    ~     (p=0.240 n=6+6)
Gzip-12                      242ms ±14%     233ms ± 7%    ~     (p=0.310 n=6+6)
Gunzip-12                   36.6ms ±22%    36.0ms ± 9%    ~     (p=0.841 n=5+5)
HTTPClientServer-12         93.1µs ±29%    88.0µs ±32%    ~     (p=0.240 n=6+6)
JSONEncode-12               27.1ms ±39%    26.2ms ±35%    ~     (p=0.589 n=6+6)
JSONDecode-12               71.7ms ±36%    71.5ms ±36%    ~     (p=0.937 n=6+6)
Mandelbrot200-12            4.78ms ±10%    4.70ms ±16%    ~     (p=0.394 n=6+6)
GoParse-12                  4.86ms ±34%    4.95ms ±36%    ~     (p=1.000 n=6+6)
RegexpMatchEasy0_32-12       110ns ±37%     110ns ±36%    ~     (p=0.660 n=6+6)
RegexpMatchEasy0_1K-12       240ns ±38%     234ns ±47%    ~     (p=0.554 n=6+6)
RegexpMatchEasy1_32-12      77.2ns ± 2%    77.2ns ±10%    ~     (p=0.699 n=6+6)
RegexpMatchEasy1_1K-12       337ns ± 5%     331ns ± 4%    ~     (p=0.552 n=6+6)
RegexpMatchMedium_32-12      125ns ±13%     132ns ±26%    ~     (p=0.561 n=6+6)
RegexpMatchMedium_1K-12     35.9µs ± 3%    36.1µs ± 5%    ~     (p=0.818 n=6+6)
RegexpMatchHard_32-12       1.81µs ± 4%    1.82µs ± 5%    ~     (p=0.452 n=5+5)
RegexpMatchHard_1K-12       52.4µs ± 2%    54.4µs ± 3%  +3.84%  (p=0.002 n=6+6)
Revcomp-12                   401ms ± 2%     390ms ± 1%  -2.82%  (p=0.002 n=6+6)
Template-12                 54.5ms ± 3%    54.6ms ± 1%    ~     (p=0.589 n=6+6)
TimeParse-12                 294ns ± 1%     298ns ± 2%    ~     (p=0.160 n=6+6)
TimeFormat-12                323ns ± 4%     318ns ± 5%    ~     (p=0.297 n=6+6)

name                      old speed      new speed      delta
GobDecode-12               109MB/s ± 4%   110MB/s ± 2%    ~     (p=1.000 n=6+6)
GobEncode-12               121MB/s ± 6%   125MB/s ± 6%    ~     (p=0.240 n=6+6)
Gzip-12                   80.4MB/s ±12%  83.3MB/s ± 7%    ~     (p=0.310 n=6+6)
Gunzip-12                  495MB/s ±41%   541MB/s ± 9%    ~     (p=0.931 n=6+5)
JSONEncode-12             80.7MB/s ±39%  82.8MB/s ±34%    ~     (p=0.589 n=6+6)
JSONDecode-12             30.4MB/s ±40%  31.0MB/s ±37%    ~     (p=0.937 n=6+6)
GoParse-12                13.2MB/s ±33%  13.2MB/s ±35%    ~     (p=1.000 n=6+6)
RegexpMatchEasy0_32-12     321MB/s ±34%   326MB/s ±34%    ~     (p=0.699 n=6+6)
RegexpMatchEasy0_1K-12    4.49GB/s ±31%  4.74GB/s ±37%    ~     (p=0.589 n=6+6)
RegexpMatchEasy1_32-12     414MB/s ± 2%   415MB/s ± 9%    ~     (p=0.699 n=6+6)
RegexpMatchEasy1_1K-12    3.03GB/s ± 5%  3.09GB/s ± 4%    ~     (p=0.699 n=6+6)
RegexpMatchMedium_32-12   7.99MB/s ±12%  7.68MB/s ±22%    ~     (p=0.589 n=6+6)
RegexpMatchMedium_1K-12   28.5MB/s ± 3%  28.4MB/s ± 5%    ~     (p=0.818 n=6+6)
RegexpMatchHard_32-12     17.7MB/s ± 4%  17.0MB/s ±15%    ~     (p=0.351 n=5+6)
RegexpMatchHard_1K-12     19.6MB/s ± 2%  18.8MB/s ± 3%  -3.67%  (p=0.002 n=6+6)
Revcomp-12                 634MB/s ± 2%   653MB/s ± 1%  +2.89%  (p=0.002 n=6+6)
Template-12               35.6MB/s ± 3%  35.5MB/s ± 1%    ~     (p=0.615 n=6+6)

Change-Id: I465a47f74227f316e3abea231444f48c7a30ef85
Reviewed-on: https://go-review.googlesource.com/24493
Run-TryBot: Dmitry Vyukov <dvyukov@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: default avatarKeith Randall <khr@golang.org>
Reviewed-by: default avatarAustin Clements <austin@google.com>
parent ab9137dd
......@@ -491,7 +491,7 @@ func nextFreeFast(s *mspan) gclinkptr {
// weight allocation. If it is a heavy weight allocation the caller must
// determine whether a new GC cycle needs to be started or if the GC is active
// whether this goroutine needs to assist the GC.
func (c *mcache) nextFree(sizeclass int8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
func (c *mcache) nextFree(sizeclass uint8) (v gclinkptr, s *mspan, shouldhelpgc bool) {
s = c.alloc[sizeclass]
shouldhelpgc = false
freeIndex := s.nextFreeIndex()
......@@ -645,11 +645,11 @@ func mallocgc(size uintptr, typ *_type, needzero bool) unsafe.Pointer {
}
size = maxTinySize
} else {
var sizeclass int8
if size <= 1024-8 {
sizeclass = size_to_class8[(size+7)>>3]
var sizeclass uint8
if size <= smallSizeMax-8 {
sizeclass = size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]
} else {
sizeclass = size_to_class128[(size-1024+127)>>7]
sizeclass = size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]
}
size = uintptr(class_to_size[sizeclass])
span := c.alloc[sizeclass]
......
......@@ -46,21 +46,27 @@ package runtime
// size divided by 128 (rounded up). The arrays are filled in
// by InitSizes.
var class_to_size [_NumSizeClasses]int32
var class_to_allocnpages [_NumSizeClasses]int32
const (
smallSizeDiv = 8
smallSizeMax = 1024
largeSizeDiv = 128
)
var class_to_size [_NumSizeClasses]uint32
var class_to_allocnpages [_NumSizeClasses]uint32
var class_to_divmagic [_NumSizeClasses]divMagic
var size_to_class8 [1024/8 + 1]int8
var size_to_class128 [(_MaxSmallSize-1024)/128 + 1]int8
var size_to_class8 [smallSizeMax/smallSizeDiv + 1]uint8
var size_to_class128 [(_MaxSmallSize-smallSizeMax)/largeSizeDiv + 1]uint8
func sizeToClass(size int32) int32 {
func sizeToClass(size uint32) uint32 {
if size > _MaxSmallSize {
throw("invalid size")
}
if size > 1024-8 {
return int32(size_to_class128[(size-1024+127)>>7])
if size > smallSizeMax-8 {
return uint32(size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv])
}
return int32(size_to_class8[(size+7)>>3])
return uint32(size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv])
}
func initSizes() {
......@@ -97,20 +103,41 @@ func initSizes() {
// use just this size instead of having two
// different sizes.
if sizeclass > 1 && npages == int(class_to_allocnpages[sizeclass-1]) && allocsize/size == allocsize/int(class_to_size[sizeclass-1]) {
class_to_size[sizeclass-1] = int32(size)
class_to_size[sizeclass-1] = uint32(size)
continue
}
class_to_allocnpages[sizeclass] = int32(npages)
class_to_size[sizeclass] = int32(size)
class_to_allocnpages[sizeclass] = uint32(npages)
class_to_size[sizeclass] = uint32(size)
sizeclass++
}
if sizeclass != _NumSizeClasses {
print("runtime: sizeclass=", sizeclass, " NumSizeClasses=", _NumSizeClasses, "\n")
throw("bad NumSizeClasses")
}
// Increase object sizes if we can fit the same number of larger objects
// into the same number of pages. For example, we choose size 8448 above
// with 6 objects in 7 pages. But we can well use object size 9472,
// which is also 6 objects in 7 pages but +1024 bytes (+12.12%).
// We need to preserve at least largeSizeDiv alignment otherwise
// sizeToClass won't work.
for i := 1; i < _NumSizeClasses; i++ {
npages := class_to_allocnpages[i]
psize := npages * _PageSize
size := class_to_size[i]
new_size := (psize / (psize / size)) &^ (largeSizeDiv - 1)
if new_size > size {
class_to_size[i] = new_size
}
}
// Check maxObjsPerSpan => number of objects invariant.
for i, size := range class_to_size {
if i != 0 && class_to_size[i-1] >= size {
throw("non-monotonic size classes")
}
if size != 0 && class_to_allocnpages[i]*pageSize/size > maxObjsPerSpan {
throw("span contains too many objects")
}
......@@ -122,18 +149,18 @@ func initSizes() {
nextsize := 0
for sizeclass = 1; sizeclass < _NumSizeClasses; sizeclass++ {
for ; nextsize < 1024 && nextsize <= int(class_to_size[sizeclass]); nextsize += 8 {
size_to_class8[nextsize/8] = int8(sizeclass)
size_to_class8[nextsize/8] = uint8(sizeclass)
}
if nextsize >= 1024 {
for ; nextsize <= int(class_to_size[sizeclass]); nextsize += 128 {
size_to_class128[(nextsize-1024)/128] = int8(sizeclass)
size_to_class128[(nextsize-1024)/128] = uint8(sizeclass)
}
}
}
// Double-check SizeToClass.
if false {
for n := int32(0); n < _MaxSmallSize; n++ {
for n := uint32(0); n < _MaxSmallSize; n++ {
sizeclass := sizeToClass(n)
if sizeclass < 1 || sizeclass >= _NumSizeClasses || class_to_size[sizeclass] < n {
print("runtime: size=", n, " sizeclass=", sizeclass, " runtime·class_to_size=", class_to_size[sizeclass], "\n")
......@@ -186,10 +213,10 @@ dump:
// Returns size of the memory block that mallocgc will allocate if you ask for the size.
func roundupsize(size uintptr) uintptr {
if size < _MaxSmallSize {
if size <= 1024-8 {
return uintptr(class_to_size[size_to_class8[(size+7)>>3]])
if size <= smallSizeMax-8 {
return uintptr(class_to_size[size_to_class8[(size+smallSizeDiv-1)/smallSizeDiv]])
} else {
return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]])
return uintptr(class_to_size[size_to_class128[(size-smallSizeMax+largeSizeDiv-1)/largeSizeDiv]])
}
}
if size+_PageSize < size {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment