Commit 23ad5631 authored by Dmitriy Vyukov's avatar Dmitriy Vyukov

runtime: transfer whole span from MCentral to MCache

Finer-grained transfers were relevant with per-M caches,
with per-P caches they are not relevant and harmful for performance.
For few small size classes where it makes difference,
it's fine to grab the whole span (4K).

benchmark          old ns/op    new ns/op    delta
BenchmarkMalloc           42           40   -4.45%

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/9374043
parent e85e6788
...@@ -267,14 +267,10 @@ extern MStats mstats; ...@@ -267,14 +267,10 @@ extern MStats mstats;
// class_to_size[i] = largest size in class i // class_to_size[i] = largest size in class i
// class_to_allocnpages[i] = number of pages to allocate when // class_to_allocnpages[i] = number of pages to allocate when
// making new objects in class i // making new objects in class i
// class_to_transfercount[i] = number of objects to move when
// taking a bunch of objects out of the central lists
// and putting them in the thread free list.
int32 runtime·SizeToClass(int32); int32 runtime·SizeToClass(int32);
extern int32 runtime·class_to_size[NumSizeClasses]; extern int32 runtime·class_to_size[NumSizeClasses];
extern int32 runtime·class_to_allocnpages[NumSizeClasses]; extern int32 runtime·class_to_allocnpages[NumSizeClasses];
extern int32 runtime·class_to_transfercount[NumSizeClasses];
extern int8 runtime·size_to_class8[1024/8 + 1]; extern int8 runtime·size_to_class8[1024/8 + 1];
extern int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1]; extern int8 runtime·size_to_class128[(MaxSmallSize-1024)/128 + 1];
extern void runtime·InitSizes(void); extern void runtime·InitSizes(void);
...@@ -399,7 +395,7 @@ struct MCentral ...@@ -399,7 +395,7 @@ struct MCentral
}; };
void runtime·MCentral_Init(MCentral *c, int32 sizeclass); void runtime·MCentral_Init(MCentral *c, int32 sizeclass);
int32 runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **first); int32 runtime·MCentral_AllocList(MCentral *c, MLink **first);
void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *first); void runtime·MCentral_FreeList(MCentral *c, int32 n, MLink *first);
void runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end); void runtime·MCentral_FreeSpan(MCentral *c, MSpan *s, int32 n, MLink *start, MLink *end);
......
...@@ -21,8 +21,7 @@ runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed) ...@@ -21,8 +21,7 @@ runtime·MCache_Alloc(MCache *c, int32 sizeclass, uintptr size, int32 zeroed)
l = &c->list[sizeclass]; l = &c->list[sizeclass];
if(l->list == nil) { if(l->list == nil) {
// Replenish using central lists. // Replenish using central lists.
n = runtime·MCentral_AllocList(&runtime·mheap->central[sizeclass], n = runtime·MCentral_AllocList(&runtime·mheap->central[sizeclass], &first);
runtime·class_to_transfercount[sizeclass], &first);
if(n == 0) if(n == 0)
runtime·throw("out of memory"); runtime·throw("out of memory");
l->list = first; l->list = first;
...@@ -91,7 +90,7 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size) ...@@ -91,7 +90,7 @@ runtime·MCache_Free(MCache *c, void *v, int32 sizeclass, uintptr size)
if(l->nlist >= MaxMCacheListLen) { if(l->nlist >= MaxMCacheListLen) {
// Release a chunk back. // Release a chunk back.
ReleaseN(c, l, runtime·class_to_transfercount[sizeclass], sizeclass); ReleaseN(c, l, l->nlist/2, sizeclass);
} }
if(c->size >= MaxMCacheSize) { if(c->size >= MaxMCacheSize) {
......
...@@ -30,16 +30,15 @@ runtime·MCentral_Init(MCentral *c, int32 sizeclass) ...@@ -30,16 +30,15 @@ runtime·MCentral_Init(MCentral *c, int32 sizeclass)
runtime·MSpanList_Init(&c->empty); runtime·MSpanList_Init(&c->empty);
} }
// Allocate up to n objects from the central free list. // Allocate a list of objects from the central free list.
// Return the number of objects allocated. // Return the number of objects allocated.
// The objects are linked together by their first words. // The objects are linked together by their first words.
// On return, *pstart points at the first object. // On return, *pstart points at the first object.
int32 int32
runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst) runtime·MCentral_AllocList(MCentral *c, MLink **pfirst)
{ {
MSpan *s; MSpan *s;
MLink *first, *last; int32 cap, n;
int32 cap, avail, i;
runtime·lock(c); runtime·lock(c);
// Replenish central list if empty. // Replenish central list if empty.
...@@ -52,31 +51,14 @@ runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst) ...@@ -52,31 +51,14 @@ runtime·MCentral_AllocList(MCentral *c, int32 n, MLink **pfirst)
} }
s = c->nonempty.next; s = c->nonempty.next;
cap = (s->npages << PageShift) / s->elemsize; cap = (s->npages << PageShift) / s->elemsize;
avail = cap - s->ref; n = cap - s->ref;
if(avail < n) *pfirst = s->freelist;
n = avail; s->freelist = nil;
// First one is guaranteed to work, because we just grew the list.
first = s->freelist;
last = first;
for(i=1; i<n; i++) {
last = last->next;
}
s->freelist = last->next;
last->next = nil;
s->ref += n; s->ref += n;
c->nfree -= n; c->nfree -= n;
runtime·MSpanList_Remove(s);
if(n == avail) { runtime·MSpanList_Insert(&c->empty, s);
if(s->freelist != nil || s->ref != cap) {
runtime·throw("invalid freelist");
}
runtime·MSpanList_Remove(s);
runtime·MSpanList_Insert(&c->empty, s);
}
runtime·unlock(c); runtime·unlock(c);
*pfirst = first;
return n; return n;
} }
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
int32 runtime·class_to_size[NumSizeClasses]; int32 runtime·class_to_size[NumSizeClasses];
int32 runtime·class_to_allocnpages[NumSizeClasses]; int32 runtime·class_to_allocnpages[NumSizeClasses];
int32 runtime·class_to_transfercount[NumSizeClasses];
// The SizeToClass lookup is implemented using two arrays, // The SizeToClass lookup is implemented using two arrays,
// one mapping sizes <= 1024 to their class and one mapping // one mapping sizes <= 1024 to their class and one mapping
...@@ -137,16 +136,6 @@ runtime·InitSizes(void) ...@@ -137,16 +136,6 @@ runtime·InitSizes(void)
// Copy out for statistics table. // Copy out for statistics table.
for(i=0; i<nelem(runtime·class_to_size); i++) for(i=0; i<nelem(runtime·class_to_size); i++)
mstats.by_size[i].size = runtime·class_to_size[i]; mstats.by_size[i].size = runtime·class_to_size[i];
// Initialize the runtime·class_to_transfercount table.
for(sizeclass = 1; sizeclass < NumSizeClasses; sizeclass++) {
n = 64*1024 / runtime·class_to_size[sizeclass];
if(n < 2)
n = 2;
if(n > 32)
n = 32;
runtime·class_to_transfercount[sizeclass] = n;
}
return; return;
dump: dump:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment