mucho cleanup

7cd29d79 · Chris Toshok · 1a268096 · 7cd29d79 · 7cd29d79
Commit 7cd29d79 authored Feb 13, 2015 by Chris Toshok
Hide whitespace changes
Inline Side-by-side

Showing with 530 additions and 527 deletions

src/gc/heap.cpp src/gc/heap.cpp +464 -449

src/gc/heap.h src/gc/heap.h +66 -78

No files found.
--- a/src/gc/heap.cpp
+++ b/src/gc/heap.cpp
@@ -35,15 +35,40 @@ namespace gc {

 void _doFree(GCAllocation* al);

-// these template functions are for both large and huge sections
-template <class ListT> inline void unlinkNode(ListT* node) {
+// lots of linked lists around here, so let's just use template functions for them all
+template <class ListT> inline void nullNextPrev(ListT* node) {
+    node->next = NULL;
+    node->prev = NULL;
+}
+
+template <class ListT> inline void removeFromLL(ListT* node) {
+    *node->prev = node->next;
+    if (node->next)
+        node->next->prev = node->prev;
+}
+
+template <class ListT> inline void removeFromLLAndNull(ListT* node) {
    *node->prev = node->next;
    if (node->next)
        node->next->prev = node->prev;
+    nullNextPrev(node);
+}
+
+template <class ListT> inline void insertIntoLL(ListT** next_pointer, ListT* next) {
+    assert(next_pointer);
+    assert(next);
+    assert(!next->next);
+    assert(!next->prev);
+
+    next->next = *next_pointer;
+    if (next->next)
+        next->next->prev = &next->next;
+    *next_pointer = next;
+    next->prev = next_pointer;
 }

-template <class ListT, typename Free>
-inline void sweepHeap(ListT* head, std::function<void(GCAllocation*)> __free, Free free_func) {
+
+template <class ListT, typename Free> inline void sweepList(ListT* head, Free free_func) {
    auto cur = head;
    while (cur) {
        GCAllocation* al = cur->data;
@@ -51,9 +76,9 @@ inline void sweepHeap(ListT* head, std::function<void(GCAllocation*)> __free, Fr
            clearMark(al);
            cur = cur->next;
        } else {
-            __free(al);
+            _doFree(al);

-            unlinkNode(cur);
+            removeFromLL(cur);

            auto to_free = cur;
            cur = cur->next;
@@ -92,6 +117,121 @@ void registerGCManagedBytes(size_t bytes) {

 Heap global_heap;

+void _doFree(GCAllocation* al) {
+    if (VERBOSITY() >= 2)
+        printf("Freeing %p\n", al->user_data);
+
+#ifndef NVALGRIND
+    VALGRIND_DISABLE_ERROR_REPORTING;
+#endif
+    GCKind alloc_kind = al->kind_id;
+#ifndef NVALGRIND
+    VALGRIND_ENABLE_ERROR_REPORTING;
+#endif
+
+    if (alloc_kind == GCKind::PYTHON) {
+#ifndef NVALGRIND
+        VALGRIND_DISABLE_ERROR_REPORTING;
+#endif
+        Box* b = (Box*)al->user_data;
+#ifndef NVALGRIND
+        VALGRIND_ENABLE_ERROR_REPORTING;
+#endif
+
+        ASSERT(b->cls->tp_dealloc == NULL, "%s", getTypeName(b));
+        if (b->cls->simple_destructor)
+            b->cls->simple_destructor(b);
+    }
+}
+
+void Heap::destructContents(GCAllocation* al) {
+    _doFree(al);
+}
+
+struct HeapStatistics {
+    struct TypeStats {
+        int64_t nallocs;
+        int64_t nbytes;
+        TypeStats() : nallocs(0), nbytes(0) {}
+
+        void print(const char* name) const {
+            if (nbytes > (1 << 20))
+                printf("%s: %ld allocations for %.1f MB\n", name, nallocs, nbytes * 1.0 / (1 << 20));
+            else if (nbytes > (1 << 10))
+                printf("%s: %ld allocations for %.1f KB\n", name, nallocs, nbytes * 1.0 / (1 << 10));
+            else
+                printf("%s: %ld allocations for %ld bytes\n", name, nallocs, nbytes);
+        }
+    };
+    std::unordered_map<BoxedClass*, TypeStats> by_cls;
+    TypeStats conservative, untracked;
+    TypeStats total;
+};
+
+void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) {
+    stats->total.nallocs++;
+    stats->total.nbytes += nbytes;
+
+    if (al->kind_id == GCKind::PYTHON) {
+        Box* b = (Box*)al->user_data;
+        auto& t = stats->by_cls[b->cls];
+
+        t.nallocs++;
+        t.nbytes += nbytes;
+    } else if (al->kind_id == GCKind::CONSERVATIVE) {
+        stats->conservative.nallocs++;
+        stats->conservative.nbytes += nbytes;
+    } else if (al->kind_id == GCKind::UNTRACKED) {
+        stats->untracked.nallocs++;
+        stats->untracked.nbytes += nbytes;
+    } else {
+        RELEASE_ASSERT(0, "%d", (int)al->kind_id);
+    }
+}
+
+
+
+void Heap::dumpHeapStatistics() {
+    threading::GLPromoteRegion _lock;
+
+    HeapStatistics stats;
+
+    small_arena.getStatistics(&stats);
+    large_arena.getStatistics(&stats);
+    huge_arena.getStatistics(&stats);
+
+    stats.conservative.print("conservative");
+    stats.untracked.print("untracked");
+    for (const auto& p : stats.by_cls) {
+        p.second.print(getFullNameOfClass(p.first).c_str());
+    }
+    stats.total.print("Total");
+    printf("\n");
+}
+
+void dumpHeapStatistics() {
+    global_heap.dumpHeapStatistics();
+}
+
+//////
+/// Small Arena
+
+GCAllocation* SmallArena::alloc(size_t bytes) {
+    registerGCManagedBytes(bytes);
+    if (bytes <= 16)
+        return _alloc(16, 0);
+    else if (bytes <= 32)
+        return _alloc(32, 1);
+    else {
+        for (int i = 2; i < NUM_BUCKETS; i++) {
+            if (sizes[i] >= bytes) {
+                return _alloc(sizes[i], i);
+            }
+        }
+        return NULL;
+    }
+}
+
 GCAllocation* SmallArena::realloc(GCAllocation* al, size_t bytes) {
    Block* b = Block::forPointer(al);

@@ -110,10 +250,25 @@ GCAllocation* SmallArena::realloc(GCAllocation* al, size_t bytes) {
    memcpy(rtn, al, std::min(bytes, size));
 #endif

-    _free(al, b);
+    free(al);
    return rtn;
 }

+void SmallArena::free(GCAllocation* alloc) {
+    Block* b = Block::forPointer(alloc);
+    size_t size = b->size;
+    int offset = (char*)alloc - (char*)b;
+    assert(offset % size == 0);
+    int atom_idx = offset / ATOM_SIZE;
+
+    assert(!b->isfree.isSet(atom_idx));
+    b->isfree.set(atom_idx);
+
+#ifndef NVALGRIND
+// VALGRIND_MEMPOOL_FREE(b, ptr);
+#endif
+}
+
 GCAllocation* SmallArena::allocationFrom(void* ptr) {
    Block* b = Block::forPointer(ptr);
    size_t size = b->size;
@@ -131,37 +286,6 @@ GCAllocation* SmallArena::allocationFrom(void* ptr) {
    return reinterpret_cast<GCAllocation*>(&b->atoms[atom_idx]);
 }

-SmallArena::Block** SmallArena::freeChain(Block** head) {
-    while (Block* b = *head) {
-        int num_objects = b->numObjects();
-        int first_obj = b->minObjIndex();
-        int atoms_per_obj = b->atomsPerObj();
-
-        for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
-            int atom_idx = obj_idx * atoms_per_obj;
-
-            if (b->isfree.isSet(atom_idx))
-                continue;
-
-            void* p = &b->atoms[atom_idx];
-            GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
-
-            if (isMarked(al)) {
-                clearMark(al);
-            } else {
-                _doFree(al);
-
-                // assert(p != (void*)0x127000d960); // the main module
-                b->isfree.set(atom_idx);
-            }
-        }
-
-        head = &b->next;
-    }
-    return head;
-}
-
-
 void SmallArena::freeUnmarked() {
    thread_caches.forEachValue([this](ThreadBlockCache* cache) {
        for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
@@ -179,185 +303,227 @@ void SmallArena::freeUnmarked() {
                    break;
            }
            if (h) {
-                removeFromLL(h);
+                removeFromLLAndNull(h);
                insertIntoLL(&heads[bidx], h);
            }

-            Block** chain_end = freeChain(&cache->cache_free_heads[bidx]);
-            freeChain(&cache->cache_full_heads[bidx]);
+            Block** chain_end = _freeChain(&cache->cache_free_heads[bidx]);
+            _freeChain(&cache->cache_full_heads[bidx]);

            while (Block* b = cache->cache_full_heads[bidx]) {
-                removeFromLL(b);
+                removeFromLLAndNull(b);
                insertIntoLL(chain_end, b);
            }
        }
    });

    for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
-        Block** chain_end = freeChain(&heads[bidx]);
-        freeChain(&full_heads[bidx]);
+        Block** chain_end = _freeChain(&heads[bidx]);
+        _freeChain(&full_heads[bidx]);

        while (Block* b = full_heads[bidx]) {
-            removeFromLL(b);
+            removeFromLLAndNull(b);
            insertIntoLL(chain_end, b);
        }
    }
 }

+// TODO: copy-pasted from freeUnmarked()
+void SmallArena::getStatistics(HeapStatistics* stats) {
+    thread_caches.forEachValue([this, stats](ThreadBlockCache* cache) {
+        for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
+            Block* h = cache->cache_free_heads[bidx];

-#define LARGE_BLOCK_NUM_CHUNKS ((BLOCK_SIZE >> CHUNK_BITS) - 1)
+            _getChainStatistics(stats, &cache->cache_free_heads[bidx]);
+            _getChainStatistics(stats, &cache->cache_full_heads[bidx]);
+        }
+    });

-#define LARGE_BLOCK_FOR_OBJ(obj) ((LargeBlock*)((int64_t)(obj) & ~(int64_t)(BLOCK_SIZE - 1)))
-#define LARGE_CHUNK_INDEX(obj, section) (((char*)(obj) - (char*)(section)) >> CHUNK_BITS)
+    for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
+        _getChainStatistics(stats, &heads[bidx]);
+        _getChainStatistics(stats, &full_heads[bidx]);
+    }
+}

-int64_t los_memory_usage = 0;

-static int64_t large_object_count = 0;
-static int large_block_count = 0;
+SmallArena::Block** SmallArena::_freeChain(Block** head) {
+    while (Block* b = *head) {
+        int num_objects = b->numObjects();
+        int first_obj = b->minObjIndex();
+        int atoms_per_obj = b->atomsPerObj();

-void LargeArena::add_free_chunk(LargeFreeChunk* free_chunks, size_t size) {
-    size_t num_chunks = size >> CHUNK_BITS;
+        for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
+            int atom_idx = obj_idx * atoms_per_obj;

-    free_chunks->size = size;
+            if (b->isfree.isSet(atom_idx))
+                continue;

-    if (num_chunks >= NUM_FREE_LISTS)
-        num_chunks = 0;
-    free_chunks->next_size = free_lists[num_chunks];
-    free_lists[num_chunks] = free_chunks;
-}
+            void* p = &b->atoms[atom_idx];
+            GCAllocation* al = reinterpret_cast<GCAllocation*>(p);

-LargeArena::LargeFreeChunk* LargeArena::get_from_size_list(LargeFreeChunk** list, size_t size) {
-    LargeFreeChunk* free_chunks = NULL;
-    LargeBlock* section;
-    size_t i, num_chunks, start_index;
+            if (isMarked(al)) {
+                clearMark(al);
+            } else {
+                _doFree(al);

-    assert((size & (CHUNK_SIZE - 1)) == 0);
+                // assert(p != (void*)0x127000d960); // the main module
+                b->isfree.set(atom_idx);
+            }
+        }

-    while (*list) {
-        free_chunks = *list;
-        if (free_chunks->size >= size)
-            break;
-        list = &(*list)->next_size;
+        head = &b->next;
    }
+    return head;
+}

-    if (!*list)
-        return NULL;

-    *list = free_chunks->next_size;
+SmallArena::Block* SmallArena::_allocBlock(uint64_t size, Block** prev) {
+    Block* rtn = (Block*)doMmap(sizeof(Block));
+    assert(rtn);
+    rtn->size = size;
+    rtn->num_obj = BLOCK_SIZE / size;
+    rtn->min_obj_index = (BLOCK_HEADER_SIZE + size - 1) / size;
+    rtn->atoms_per_obj = size / ATOM_SIZE;
+    rtn->prev = prev;
+    rtn->next = NULL;

-    if (free_chunks->size > size)
-        add_free_chunk((LargeFreeChunk*)((char*)free_chunks + size), free_chunks->size - size);
+#ifndef NVALGRIND
+// Not sure if this mempool stuff is better than the malloc-like interface:
+// VALGRIND_CREATE_MEMPOOL(rtn, 0, true);
+#endif

-    num_chunks = size >> CHUNK_BITS;
+    // printf("Allocated new block %p\n", rtn);

-    section = LARGE_BLOCK_FOR_OBJ(free_chunks);
+    // Don't think I need to do this:
+    rtn->isfree.setAllZero();
+    rtn->next_to_check.reset();

-    start_index = LARGE_CHUNK_INDEX(free_chunks, section);
-    for (i = start_index; i < start_index + num_chunks; ++i) {
-        assert(section->free_chunk_map[i]);
-        section->free_chunk_map[i] = 0;
+    int num_objects = rtn->numObjects();
+    int num_lost = rtn->minObjIndex();
+    int atoms_per_object = rtn->atomsPerObj();
+    for (int i = num_lost * atoms_per_object; i < num_objects * atoms_per_object; i += atoms_per_object) {
+        rtn->isfree.set(i);
+        // printf("%d %d\n", idx, bit);
    }

-    section->num_free_chunks -= size >> CHUNK_BITS;
-    assert(section->num_free_chunks >= 0);
-
-    return free_chunks;
-}
-
-LargeArena::LargeObj* LargeArena::_allocInternal(size_t size) {
-    LargeBlock* section;
-    LargeFreeChunk* free_chunks;
-    size_t num_chunks;
-
-    size += CHUNK_SIZE - 1;
-    size &= ~(CHUNK_SIZE - 1);
+    // printf("%d %d %d\n", num_objects, num_lost, atoms_per_object);
+    // for (int i =0; i < BITFIELD_ELTS; i++) {
+    // printf("%d: %lx\n", i, rtn->isfree[i]);
+    //}
+    return rtn;
+}

-    num_chunks = size >> CHUNK_BITS;
+SmallArena::ThreadBlockCache::~ThreadBlockCache() {
+    LOCK_REGION(heap->lock);

-    assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
-    assert(num_chunks > 0);
+    for (int i = 0; i < NUM_BUCKETS; i++) {
+        while (Block* b = cache_free_heads[i]) {
+            removeFromLLAndNull(b);
+            insertIntoLL(&small->heads[i], b);
+        }

-retry:
-    if (num_chunks >= NUM_FREE_LISTS) {
-        free_chunks = get_from_size_list(&free_lists[0], size);
-    } else {
-        size_t i;
-        for (i = num_chunks; i < NUM_FREE_LISTS; ++i) {
-            free_chunks = get_from_size_list(&free_lists[i], size);
-            if (free_chunks)
-                break;
+        while (Block* b = cache_full_heads[i]) {
+            removeFromLLAndNull(b);
+            insertIntoLL(&small->full_heads[i], b);
        }
-        if (!free_chunks)
-            free_chunks = get_from_size_list(&free_lists[0], size);
    }
+}

-    if (free_chunks)
-        return (LargeObj*)free_chunks;
+GCAllocation* SmallArena::_allocFromBlock(Block* b) {
+    int idx = b->isfree.scanForNext(b->next_to_check);
+    if (idx == -1)
+        return NULL;

-    section = (LargeBlock*)doMmap(BLOCK_SIZE);
+    void* rtn = &b->atoms[idx];
+    return reinterpret_cast<GCAllocation*>(rtn);
+}

-    if (!section)
-        return NULL;
+SmallArena::Block* SmallArena::_claimBlock(size_t rounded_size, Block** free_head) {
+    Block* free_block = *free_head;
+    if (free_block) {
+        removeFromLLAndNull(free_block);
+        return free_block;
+    }

-    free_chunks = (LargeFreeChunk*)((char*)section + CHUNK_SIZE);
-    free_chunks->size = BLOCK_SIZE - CHUNK_SIZE;
-    free_chunks->next_size = free_lists[0];
-    free_lists[0] = free_chunks;
+    return _allocBlock(rounded_size, NULL);
+}

-    section->num_free_chunks = LARGE_BLOCK_NUM_CHUNKS;
+GCAllocation* SmallArena::_alloc(size_t rounded_size, int bucket_idx) {
+    Block** free_head = &heads[bucket_idx];
+    Block** full_head = &full_heads[bucket_idx];

-    section->free_chunk_map = (unsigned char*)section + sizeof(LargeBlock);
-    assert(sizeof(LargeBlock) + LARGE_BLOCK_NUM_CHUNKS + 1 <= CHUNK_SIZE);
-    section->free_chunk_map[0] = 0;
-    memset(section->free_chunk_map + 1, 1, LARGE_BLOCK_NUM_CHUNKS);
+    ThreadBlockCache* cache = thread_caches.get();

-    section->next = blocks;
-    blocks = section;
+    Block** cache_head = &cache->cache_free_heads[bucket_idx];

-    ++large_block_count;
+    // static __thread int gc_allocs = 0;
+    // if (++gc_allocs == 128) {
+    // static StatCounter sc_total("gc_allocs");
+    // sc_total.log(128);
+    // gc_allocs = 0;
+    //}

-    goto retry;
-}
+    while (true) {
+        while (Block* cache_block = *cache_head) {
+            GCAllocation* rtn = _allocFromBlock(cache_block);
+            if (rtn)
+                return rtn;

-void LargeArena::_freeInternal(LargeObj* obj, size_t size) {
-    LargeBlock* section = LARGE_BLOCK_FOR_OBJ(obj);
-    size_t num_chunks, i, start_index;
+            removeFromLLAndNull(cache_block);
+            insertIntoLL(&cache->cache_full_heads[bucket_idx], cache_block);
+        }

-    size += CHUNK_SIZE - 1;
-    size &= ~(CHUNK_SIZE - 1);
+        // Not very useful to count the cache misses if we don't count the total attempts:
+        // static StatCounter sc_fallback("gc_allocs_cachemiss");
+        // sc_fallback.log();

-    num_chunks = size >> CHUNK_BITS;
+        LOCK_REGION(heap->lock);

-    assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
-    assert(num_chunks > 0);
+        assert(*cache_head == NULL);

-    section->num_free_chunks += num_chunks;
-    assert(section->num_free_chunks <= LARGE_BLOCK_NUM_CHUNKS);
+        // should probably be called allocBlock:
+        Block* myblock = _claimBlock(rounded_size, &heads[bucket_idx]);
+        assert(myblock);
+        assert(!myblock->next);
+        assert(!myblock->prev);

-    /*
-     * We could free the LOS section here if it's empty, but we
-     * can't unless we also remove its free chunks from the fast
-     * free lists.  Instead, we do it in los_sweep().
-     */
+        // printf("%d claimed new block %p with %d objects\n", threading::gettid(), myblock, myblock->numObjects());

-    start_index = LARGE_CHUNK_INDEX(obj, section);
-    for (i = start_index; i < start_index + num_chunks; ++i) {
-        assert(!section->free_chunk_map[i]);
-        section->free_chunk_map[i] = 1;
+        insertIntoLL(cache_head, myblock);
    }
-
-    add_free_chunk((LargeFreeChunk*)obj, size);
 }

-void LargeArena::_free(LargeObj* obj) {
-    unlinkNode(obj);
-    _freeInternal(obj, obj->size);
-}
+// TODO: copy-pasted from _freeChain
+void SmallArena::_getChainStatistics(HeapStatistics* stats, Block** head) {
+    while (Block* b = *head) {
+        int num_objects = b->numObjects();
+        int first_obj = b->minObjIndex();
+        int atoms_per_obj = b->atomsPerObj();

-void LargeArena::freeUnmarked() {
-    sweepHeap(head, _doFree, [this](LargeObj* ptr) { _freeInternal(ptr, ptr->size); });
+        for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
+            int atom_idx = obj_idx * atoms_per_obj;
+
+            if (b->isfree.isSet(atom_idx))
+                continue;
+
+            void* p = &b->atoms[atom_idx];
+            GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
+
+            addStatistic(stats, al, b->size);
+        }
+
+        head = &b->next;
+    }
 }

+//////
+/// Large Arena
+
+#define LARGE_BLOCK_NUM_CHUNKS ((BLOCK_SIZE >> CHUNK_BITS) - 1)
+
+#define LARGE_BLOCK_FOR_OBJ(obj) ((LargeBlock*)((int64_t)(obj) & ~(int64_t)(BLOCK_SIZE - 1)))
+#define LARGE_CHUNK_INDEX(obj, section) (((char*)(obj) - (char*)(section)) >> CHUNK_BITS)
+
 GCAllocation* LargeArena::alloc(size_t size) {
    registerGCManagedBytes(size);

@@ -365,22 +531,18 @@ GCAllocation* LargeArena::alloc(size_t size) {

    // printf ("allocLarge %zu\n", size);

-    LargeObj* obj = _allocInternal(size + sizeof(GCAllocation) + sizeof(LargeObj));
+    LargeObj* obj = _alloc(size + sizeof(GCAllocation) + sizeof(LargeObj));

    obj->size = size;

-    obj->next = head;
-    if (obj->next)
-        obj->next->prev = &obj->next;
-    obj->prev = &head;
-    head = obj;
-    large_object_count++;
+    nullNextPrev(obj);
+    insertIntoLL(&head, obj);

    return obj->data;
 }

 GCAllocation* LargeArena::realloc(GCAllocation* al, size_t bytes) {
-    LargeObj* obj = (LargeObj*)((char*)al - offsetof(LargeObj, data));
+    LargeObj* obj = LargeObj::fromAllocation(al);
    int size = obj->size;
    if (size >= bytes && size < bytes * 2)
        return al;
@@ -388,13 +550,12 @@ GCAllocation* LargeArena::realloc(GCAllocation* al, size_t bytes) {
    GCAllocation* rtn = heap->alloc(bytes);
    memcpy(rtn, al, std::min(bytes, obj->size));

-    _free(obj);
+    _freeLargeObj(obj);
    return rtn;
 }

 void LargeArena::free(GCAllocation* al) {
-    LargeObj* obj = (LargeObj*)((char*)al - offsetof(LargeObj, data));
-    _free(obj);
+    _freeLargeObj(LargeObj::fromAllocation(al));
 }

 GCAllocation* LargeArena::allocationFrom(void* ptr) {
@@ -410,341 +571,206 @@ GCAllocation* LargeArena::allocationFrom(void* ptr) {
    return NULL;
 }

-void HugeArena::freeUnmarked() {
-    sweepHeap(head, _doFree, [this](HugeObj* ptr) { _freeHugeObj(ptr); });
-}
-
-GCAllocation* HugeArena::alloc(size_t size) {
-    registerGCManagedBytes(size);
-
-    LOCK_REGION(heap->lock);
-
-    size_t total_size = size + sizeof(HugeObj);
-    total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
-    HugeObj* rtn = (HugeObj*)doMmap(total_size);
-    rtn->obj_size = size;
-
-    rtn->next = head;
-    if (rtn->next)
-        rtn->next->prev = &rtn->next;
-    rtn->prev = &head;
-    head = rtn;
-    return rtn->data;
-}
-
-GCAllocation* HugeArena::realloc(GCAllocation* al, size_t bytes) {
-    HugeObj* lobj = HugeObj::fromAllocation(al);
-
-    int capacity = lobj->capacity();
-    if (capacity >= bytes && capacity < bytes * 2)
-        return al;
-
-    GCAllocation* rtn = heap->alloc(bytes);
-    memcpy(rtn, al, std::min(bytes, lobj->obj_size));
-
-    _freeHugeObj(lobj);
-    return rtn;
-}
-
-void HugeArena::_freeHugeObj(HugeObj* lobj) {
-    unlinkNode(lobj);
-    int r = munmap(lobj, lobj->mmap_size());
-    assert(r == 0);
-}
-
-
-void HugeArena::free(GCAllocation* al) {
-    HugeObj* lobj = HugeObj::fromAllocation(al);
-    _freeHugeObj(lobj);
+void LargeArena::freeUnmarked() {
+    sweepList(head, [this](LargeObj* ptr) { _freeLargeObj(ptr); });
 }

-GCAllocation* HugeArena::allocationFrom(void* ptr) {
-    HugeObj* cur = head;
+void LargeArena::getStatistics(HeapStatistics* stats) {
+    LargeObj* cur = head;
    while (cur) {
-        if (ptr >= cur && ptr < &cur->data[cur->obj_size])
-            return &cur->data[0];
+        GCAllocation* al = cur->data;
+        addStatistic(stats, al, cur->size);
+
        cur = cur->next;
    }
-    return NULL;
 }

-SmallArena::Block* SmallArena::alloc_block(uint64_t size, Block** prev) {
-    Block* rtn = (Block*)doMmap(sizeof(Block));
-    assert(rtn);
-    rtn->size = size;
-    rtn->num_obj = BLOCK_SIZE / size;
-    rtn->min_obj_index = (BLOCK_HEADER_SIZE + size - 1) / size;
-    rtn->atoms_per_obj = size / ATOM_SIZE;
-    rtn->prev = prev;
-    rtn->next = NULL;
-
-#ifndef NVALGRIND
-// Not sure if this mempool stuff is better than the malloc-like interface:
-// VALGRIND_CREATE_MEMPOOL(rtn, 0, true);
-#endif

-    // printf("Allocated new block %p\n", rtn);
-
-    // Don't think I need to do this:
-    rtn->isfree.setAllZero();
-    rtn->next_to_check.reset();
+void LargeArena::add_free_chunk(LargeFreeChunk* free_chunks, size_t size) {
+    size_t num_chunks = size >> CHUNK_BITS;

-    int num_objects = rtn->numObjects();
-    int num_lost = rtn->minObjIndex();
-    int atoms_per_object = rtn->atomsPerObj();
-    for (int i = num_lost * atoms_per_object; i < num_objects * atoms_per_object; i += atoms_per_object) {
-        rtn->isfree.set(i);
-        // printf("%d %d\n", idx, bit);
-    }
+    free_chunks->size = size;

-    // printf("%d %d %d\n", num_objects, num_lost, atoms_per_object);
-    // for (int i =0; i < BITFIELD_ELTS; i++) {
-    // printf("%d: %lx\n", i, rtn->isfree[i]);
-    //}
-    return rtn;
+    if (num_chunks >= NUM_FREE_LISTS)
+        num_chunks = 0;
+    free_chunks->next_size = free_lists[num_chunks];
+    free_lists[num_chunks] = free_chunks;
 }

-void SmallArena::insertIntoLL(Block** next_pointer, Block* next) {
-    assert(next_pointer);
-    assert(next);
-    assert(!next->next);
-    assert(!next->prev);
+LargeArena::LargeFreeChunk* LargeArena::get_from_size_list(LargeFreeChunk** list, size_t size) {
+    LargeFreeChunk* free_chunks = NULL;
+    LargeBlock* section;
+    size_t i, num_chunks, start_index;

-    next->next = *next_pointer;
-    if (next->next)
-        next->next->prev = &next->next;
-    *next_pointer = next;
-    next->prev = next_pointer;
-}
+    assert((size & (CHUNK_SIZE - 1)) == 0);

-void SmallArena::removeFromLL(Block* b) {
-    unlinkNode(b);
-    b->next = NULL;
-    b->prev = NULL;
-}
+    while (*list) {
+        free_chunks = *list;
+        if (free_chunks->size >= size)
+            break;
+        list = &(*list)->next_size;
+    }

-SmallArena::ThreadBlockCache::~ThreadBlockCache() {
-    LOCK_REGION(heap->lock);
+    if (!*list)
+        return NULL;

-    for (int i = 0; i < NUM_BUCKETS; i++) {
-        while (Block* b = cache_free_heads[i]) {
-            small->removeFromLL(b);
-            small->insertIntoLL(&small->heads[i], b);
-        }
+    *list = free_chunks->next_size;

-        while (Block* b = cache_full_heads[i]) {
-            small->removeFromLL(b);
-            small->insertIntoLL(&small->full_heads[i], b);
-        }
-    }
-}
+    if (free_chunks->size > size)
+        add_free_chunk((LargeFreeChunk*)((char*)free_chunks + size), free_chunks->size - size);

-GCAllocation* SmallArena::allocFromBlock(Block* b) {
-    int idx = b->isfree.scanForNext(b->next_to_check);
-    if (idx == -1)
-        return NULL;
+    num_chunks = size >> CHUNK_BITS;

-    void* rtn = &b->atoms[idx];
-    return reinterpret_cast<GCAllocation*>(rtn);
-}
+    section = LARGE_BLOCK_FOR_OBJ(free_chunks);

-SmallArena::Block* SmallArena::claimBlock(size_t rounded_size, Block** free_head) {
-    Block* free_block = *free_head;
-    if (free_block) {
-        removeFromLL(free_block);
-        return free_block;
+    start_index = LARGE_CHUNK_INDEX(free_chunks, section);
+    for (i = start_index; i < start_index + num_chunks; ++i) {
+        assert(section->free_chunk_map[i]);
+        section->free_chunk_map[i] = 0;
    }

-    return alloc_block(rounded_size, NULL);
+    section->num_free_chunks -= size >> CHUNK_BITS;
+    assert(section->num_free_chunks >= 0);
+
+    return free_chunks;
 }

-GCAllocation* SmallArena::_alloc(size_t rounded_size, int bucket_idx) {
-    registerGCManagedBytes(rounded_size);
+LargeArena::LargeObj* LargeArena::_alloc(size_t size) {
+    LargeBlock* section;
+    LargeFreeChunk* free_chunks;
+    size_t num_chunks;

-    Block** free_head = &heads[bucket_idx];
-    Block** full_head = &full_heads[bucket_idx];
+    size += CHUNK_SIZE - 1;
+    size &= ~(CHUNK_SIZE - 1);

-    ThreadBlockCache* cache = thread_caches.get();
+    num_chunks = size >> CHUNK_BITS;

-    Block** cache_head = &cache->cache_free_heads[bucket_idx];
+    assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
+    assert(num_chunks > 0);

-    // static __thread int gc_allocs = 0;
-    // if (++gc_allocs == 128) {
-    // static StatCounter sc_total("gc_allocs");
-    // sc_total.log(128);
-    // gc_allocs = 0;
-    //}
+retry:
+    if (num_chunks >= NUM_FREE_LISTS) {
+        free_chunks = get_from_size_list(&free_lists[0], size);
+    } else {
+        size_t i;
+        for (i = num_chunks; i < NUM_FREE_LISTS; ++i) {
+            free_chunks = get_from_size_list(&free_lists[i], size);
+            if (free_chunks)
+                break;
+        }
+        if (!free_chunks)
+            free_chunks = get_from_size_list(&free_lists[0], size);
+    }

-    while (true) {
-        while (Block* cache_block = *cache_head) {
-            GCAllocation* rtn = allocFromBlock(cache_block);
-            if (rtn)
-                return rtn;
+    if (free_chunks)
+        return (LargeObj*)free_chunks;

-            removeFromLL(cache_block);
-            insertIntoLL(&cache->cache_full_heads[bucket_idx], cache_block);
-        }
+    section = (LargeBlock*)doMmap(BLOCK_SIZE);

-        // Not very useful to count the cache misses if we don't count the total attempts:
-        // static StatCounter sc_fallback("gc_allocs_cachemiss");
-        // sc_fallback.log();
+    if (!section)
+        return NULL;

-        LOCK_REGION(heap->lock);
+    free_chunks = (LargeFreeChunk*)((char*)section + CHUNK_SIZE);
+    free_chunks->size = BLOCK_SIZE - CHUNK_SIZE;
+    free_chunks->next_size = free_lists[0];
+    free_lists[0] = free_chunks;

-        assert(*cache_head == NULL);
+    section->num_free_chunks = LARGE_BLOCK_NUM_CHUNKS;

-        // should probably be called allocBlock:
-        Block* myblock = claimBlock(rounded_size, &heads[bucket_idx]);
-        assert(myblock);
-        assert(!myblock->next);
-        assert(!myblock->prev);
+    section->free_chunk_map = (unsigned char*)section + sizeof(LargeBlock);
+    assert(sizeof(LargeBlock) + LARGE_BLOCK_NUM_CHUNKS + 1 <= CHUNK_SIZE);
+    section->free_chunk_map[0] = 0;
+    memset(section->free_chunk_map + 1, 1, LARGE_BLOCK_NUM_CHUNKS);

-        // printf("%d claimed new block %p with %d objects\n", threading::gettid(), myblock, myblock->numObjects());
+    section->next = blocks;
+    blocks = section;

-        insertIntoLL(cache_head, myblock);
-    }
+    goto retry;
 }

-void SmallArena::_free(GCAllocation* alloc, Block* b) {
-    assert(b == Block::forPointer(alloc));
+void LargeArena::_freeLargeObj(LargeObj* obj) {
+    removeFromLL(obj);

-    size_t size = b->size;
-    int offset = (char*)alloc - (char*)b;
-    assert(offset % size == 0);
-    int atom_idx = offset / ATOM_SIZE;
+    size_t size = obj->size;
+    LargeBlock* section = LARGE_BLOCK_FOR_OBJ(obj);
+    size_t num_chunks, i, start_index;

-    assert(!b->isfree.isSet(atom_idx));
-    b->isfree.set(atom_idx);
+    size += CHUNK_SIZE - 1;
+    size &= ~(CHUNK_SIZE - 1);

-#ifndef NVALGRIND
-// VALGRIND_MEMPOOL_FREE(b, ptr);
-#endif
-}
+    num_chunks = size >> CHUNK_BITS;

-void _doFree(GCAllocation* al) {
-    if (VERBOSITY() >= 2)
-        printf("Freeing %p\n", al->user_data);
+    assert(size > 0 && size - sizeof(LargeObj) <= ALLOC_SIZE_LIMIT);
+    assert(num_chunks > 0);

-#ifndef NVALGRIND
-    VALGRIND_DISABLE_ERROR_REPORTING;
-#endif
-    GCKind alloc_kind = al->kind_id;
-#ifndef NVALGRIND
-    VALGRIND_ENABLE_ERROR_REPORTING;
-#endif
+    section->num_free_chunks += num_chunks;
+    assert(section->num_free_chunks <= LARGE_BLOCK_NUM_CHUNKS);

-    if (alloc_kind == GCKind::PYTHON) {
-#ifndef NVALGRIND
-        VALGRIND_DISABLE_ERROR_REPORTING;
-#endif
-        Box* b = (Box*)al->user_data;
-#ifndef NVALGRIND
-        VALGRIND_ENABLE_ERROR_REPORTING;
-#endif
+    /*
+     * We could free the LOS section here if it's empty, but we
+     * can't unless we also remove its free chunks from the fast
+     * free lists.  Instead, we do it in los_sweep().
+     */

-        ASSERT(b->cls->tp_dealloc == NULL, "%s", getTypeName(b));
-        if (b->cls->simple_destructor)
-            b->cls->simple_destructor(b);
+    start_index = LARGE_CHUNK_INDEX(obj, section);
+    for (i = start_index; i < start_index + num_chunks; ++i) {
+        assert(!section->free_chunk_map[i]);
+        section->free_chunk_map[i] = 1;
    }
-}

-void Heap::destroyContents(GCAllocation* al) {
-    _doFree(al);
+    add_free_chunk((LargeFreeChunk*)obj, size);
 }

-void dumpHeapStatistics() {
-    global_heap.dumpHeapStatistics();
-}
+//////
+/// Huge Arena

-struct HeapStatistics {
-    struct TypeStats {
-        int64_t nallocs;
-        int64_t nbytes;
-        TypeStats() : nallocs(0), nbytes(0) {}

-        void print(const char* name) const {
-            if (nbytes > (1 << 20))
-                printf("%s: %ld allocations for %.1f MB\n", name, nallocs, nbytes * 1.0 / (1 << 20));
-            else if (nbytes > (1 << 10))
-                printf("%s: %ld allocations for %.1f KB\n", name, nallocs, nbytes * 1.0 / (1 << 10));
-            else
-                printf("%s: %ld allocations for %ld bytes\n", name, nallocs, nbytes);
-        }
-    };
-    std::unordered_map<BoxedClass*, TypeStats> by_cls;
-    TypeStats conservative, untracked;
-    TypeStats total;
-};
-
-void addStatistic(HeapStatistics* stats, GCAllocation* al, int nbytes) {
-    stats->total.nallocs++;
-    stats->total.nbytes += nbytes;
+GCAllocation* HugeArena::alloc(size_t size) {
+    registerGCManagedBytes(size);

-    if (al->kind_id == GCKind::PYTHON) {
-        Box* b = (Box*)al->user_data;
-        auto& t = stats->by_cls[b->cls];
+    LOCK_REGION(heap->lock);

-        t.nallocs++;
-        t.nbytes += nbytes;
-    } else if (al->kind_id == GCKind::CONSERVATIVE) {
-        stats->conservative.nallocs++;
-        stats->conservative.nbytes += nbytes;
-    } else if (al->kind_id == GCKind::UNTRACKED) {
-        stats->untracked.nallocs++;
-        stats->untracked.nbytes += nbytes;
-    } else {
-        RELEASE_ASSERT(0, "%d", (int)al->kind_id);
-    }
-}
+    size_t total_size = size + sizeof(HugeObj);
+    total_size = (total_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+    HugeObj* rtn = (HugeObj*)doMmap(total_size);
+    rtn->obj_size = size;

-// TODO: copy-pasted from freeChain
-void SmallArena::getChainStatistics(HeapStatistics* stats, Block** head) {
-    while (Block* b = *head) {
-        int num_objects = b->numObjects();
-        int first_obj = b->minObjIndex();
-        int atoms_per_obj = b->atomsPerObj();
+    nullNextPrev(rtn);
+    insertIntoLL(&head, rtn);

-        for (int obj_idx = first_obj; obj_idx < num_objects; obj_idx++) {
-            int atom_idx = obj_idx * atoms_per_obj;
+    return rtn->data;
+}

-            if (b->isfree.isSet(atom_idx))
-                continue;
+GCAllocation* HugeArena::realloc(GCAllocation* al, size_t bytes) {
+    HugeObj* obj = HugeObj::fromAllocation(al);

-            void* p = &b->atoms[atom_idx];
-            GCAllocation* al = reinterpret_cast<GCAllocation*>(p);
+    int capacity = obj->capacity();
+    if (capacity >= bytes && capacity < bytes * 2)
+        return al;

-            addStatistic(stats, al, b->size);
-        }
+    GCAllocation* rtn = heap->alloc(bytes);
+    memcpy(rtn, al, std::min(bytes, obj->obj_size));

-        head = &b->next;
-    }
+    _freeHugeObj(obj);
+    return rtn;
 }

-// TODO: copy-pasted from freeUnmarked()
-void SmallArena::getStatistics(HeapStatistics* stats) {
-    thread_caches.forEachValue([this, stats](ThreadBlockCache* cache) {
-        for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
-            Block* h = cache->cache_free_heads[bidx];
-
-            getChainStatistics(stats, &cache->cache_free_heads[bidx]);
-            getChainStatistics(stats, &cache->cache_full_heads[bidx]);
-        }
-    });
-
-    for (int bidx = 0; bidx < NUM_BUCKETS; bidx++) {
-        getChainStatistics(stats, &heads[bidx]);
-        getChainStatistics(stats, &full_heads[bidx]);
-    }
+void HugeArena::free(GCAllocation* al) {
+    _freeHugeObj(HugeObj::fromAllocation(al));
 }

-void LargeArena::getStatistics(HeapStatistics* stats) {
-    LargeObj* cur = head;
+GCAllocation* HugeArena::allocationFrom(void* ptr) {
+    HugeObj* cur = head;
    while (cur) {
-        GCAllocation* al = cur->data;
-        addStatistic(stats, al, cur->size);
-
+        if (ptr >= cur && ptr < &cur->data[cur->obj_size])
+            return &cur->data[0];
        cur = cur->next;
    }
+    return NULL;
+}
+
+void HugeArena::freeUnmarked() {
+    sweepList(head, [this](HugeObj* ptr) { _freeHugeObj(ptr); });
 }

 void HugeArena::getStatistics(HeapStatistics* stats) {
@@ -757,23 +783,12 @@ void HugeArena::getStatistics(HeapStatistics* stats) {
    }
 }

-void Heap::dumpHeapStatistics() {
-    threading::GLPromoteRegion _lock;
-
-    HeapStatistics stats;
-
-    small_arena.getStatistics(&stats);
-    large_arena.getStatistics(&stats);
-    huge_arena.getStatistics(&stats);
-
-    stats.conservative.print("conservative");
-    stats.untracked.print("untracked");
-    for (const auto& p : stats.by_cls) {
-        p.second.print(getFullNameOfClass(p.first).c_str());
-    }
-    stats.total.print("Total");
-    printf("\n");
+void HugeArena::_freeHugeObj(HugeObj* lobj) {
+    removeFromLL(lobj);
+    int r = munmap(lobj, lobj->mmap_size());
+    assert(r == 0);
 }

+
 } // namespace gc
 } // namespace pyston
--- a/src/gc/heap.h
+++ b/src/gc/heap.h
@@ -65,17 +65,20 @@ inline void clearMark(GCAllocation* header) {

 #define PAGE_SIZE 4096

-template <uintptr_t start> class Arena {
+template <uintptr_t arena_start, uintptr_t arena_size> class Arena {
 private:
    void* cur;
+    void* end;

 protected:
-    Arena() : cur((void*)start) {}
+    Arena() : cur((void*)arena_start), end((void*)(arena_start + arena_size)) {}

 public:
    void* doMmap(size_t size) {
        assert(size % PAGE_SIZE == 0);

+        assert(((uint8_t*)cur + size) < end && "arena full");
+
        void* mrtn = mmap(cur, size, PROT_READ | PROT_WRITE, MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
        assert((uintptr_t)mrtn != -1 && "failed to allocate memory from OS");
        ASSERT(mrtn == cur, "%p %p\n", mrtn, cur);
@@ -83,9 +86,10 @@ public:
        return mrtn;
    }

-    bool contains(void* addr) { return (void*)start <= addr && addr < cur; }
+    bool contains(void* addr) { return (void*)arena_start <= addr && addr < cur; }
 };

+constexpr uintptr_t ARENA_SIZE = 0x1000000000L;
 constexpr uintptr_t SMALL_ARENA_START = 0x1270000000L;
 constexpr uintptr_t LARGE_ARENA_START = 0x2270000000L;
 constexpr uintptr_t HUGE_ARENA_START = 0x3270000000L;
@@ -94,8 +98,8 @@ constexpr uintptr_t HUGE_ARENA_START = 0x3270000000L;
 //
 // The SmallArena allocates objects <= 3584 bytes.
 //
-// it uses segregated-fit allocation, and each block contains free
-// bitmap for objects of a given size (assigned to the block)
+// it uses segregated-fit allocation, and each block contains a free
+// bitmap for objects of a given size (constant for the block)
 //
 static const size_t sizes[] = {
    16,  32,  48,  64,  80,  96,   112,  128,  160,  192,  224,  256,  320,  384,
@@ -103,8 +107,20 @@ static const size_t sizes[] = {
 };
 static constexpr size_t NUM_BUCKETS = sizeof(sizes) / sizeof(sizes[0]);

-class SmallArena : public Arena<SMALL_ARENA_START> {
+
+class SmallArena : public Arena<SMALL_ARENA_START, ARENA_SIZE> {
 public:
+    SmallArena(Heap* heap) : Arena(), heap(heap), thread_caches(heap, this) {}
+
+    GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
+    GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
+    void free(GCAllocation* al);
+
+    GCAllocation* allocationFrom(void* ptr);
+    void freeUnmarked();
+
+    void getStatistics(HeapStatistics* stats);
+
 private:
    template <int N> class Bitmap {
        static_assert(N % 64 == 0, "");
@@ -205,8 +221,7 @@ private:
    static_assert(offsetof(Block, _header_end) >= BLOCK_HEADER_SIZE, "bad header size");
    static_assert(offsetof(Block, _header_end) <= BLOCK_HEADER_SIZE, "bad header size");

-    // forward (public) definition of ThreadBlockCache so we can reference it both in this class (privately) and in Heap
-    // (for a friend ref).
+
    struct ThreadBlockCache {
        Heap* heap;
        SmallArena* small;
@@ -221,7 +236,6 @@ private:
    };


-
    Block* heads[NUM_BUCKETS];
    Block* full_heads[NUM_BUCKETS];

@@ -231,71 +245,46 @@ private:
    // TODO only use thread caches if we're in GRWL mode?
    threading::PerThreadSet<ThreadBlockCache, Heap*, SmallArena*> thread_caches;

-
-    Block* alloc_block(uint64_t size, Block** prev);
-    GCAllocation* allocFromBlock(Block* b);
-    Block* claimBlock(size_t rounded_size, Block** free_head);
-    void insertIntoLL(Block** next_pointer, Block* next);
-    void removeFromLL(Block* b);
-    Block** freeChain(Block** head);
-    void getChainStatistics(HeapStatistics* stats, Block** head);
+    Block* _allocBlock(uint64_t size, Block** prev);
+    GCAllocation* _allocFromBlock(Block* b);
+    Block* _claimBlock(size_t rounded_size, Block** free_head);
+    Block** _freeChain(Block** head);
+    void _getChainStatistics(HeapStatistics* stats, Block** head);

    GCAllocation* __attribute__((__malloc__)) _alloc(size_t bytes, int bucket_idx);
-    void _free(GCAllocation* al, Block* b);
-
-public:
-    SmallArena(Heap* heap) : Arena(), heap(heap), thread_caches(heap, this) {}
-
-    GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes) {
-        if (bytes <= 16)
-            return _alloc(16, 0);
-        else if (bytes <= 32)
-            return _alloc(32, 1);
-        else {
-            for (int i = 2; i < NUM_BUCKETS; i++) {
-                if (sizes[i] >= bytes) {
-                    return _alloc(sizes[i], i);
-                }
-            }
-            return NULL;
-        }
-    }
-    GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
-
-    void free(GCAllocation* al) {
-        Block* b = Block::forPointer(al);
-        _free(al, b);
-    }
-
-    void getStatistics(HeapStatistics* stats);
-
-    GCAllocation* allocationFrom(void* ptr);
-    void freeUnmarked();
 };

 //
-// The LargeArena allocates objects where 3584 < size <1024*1024 bytes.
+// The LargeArena allocates objects where 3584 < size <1024*1024-CHUNK_SIZE-sizeof(LargeObject) bytes.
 //
 // it maintains a set of size-segregated free lists, and a special
 // free list for larger objects.  If the free list specific to a given
 // size has no entries, we search the large free list.
 //
-class LargeArena : public Arena<LARGE_ARENA_START> {
-    struct LargeFreeChunk {
-        LargeFreeChunk* next_size;
-        size_t size;
-    };
-
+// Blocks of 1meg are mmap'ed individually, and carved up as needed.
+//
+class LargeArena : public Arena<LARGE_ARENA_START, ARENA_SIZE> {
+private:
    struct LargeBlock {
        LargeBlock* next;
        size_t num_free_chunks;
        unsigned char* free_chunk_map;
    };

+    struct LargeFreeChunk {
+        LargeFreeChunk* next_size;
+        size_t size;
+    };
+
    struct LargeObj {
        LargeObj* next, **prev;
        size_t size;
        GCAllocation data[0];
+
+        static LargeObj* fromAllocation(GCAllocation* alloc) {
+            char* rtn = (char*)alloc - offsetof(LargeObj, data);
+            return reinterpret_cast<LargeObj*>(rtn);
+        }
    };

    /*
@@ -311,20 +300,18 @@ class LargeArena : public Arena<LARGE_ARENA_START> {

    static constexpr int NUM_FREE_LISTS = 32;

-    void add_free_chunk(LargeFreeChunk* free_chunks, size_t size);
-    LargeFreeChunk* get_from_size_list(LargeFreeChunk** list, size_t size);
-    LargeObj* _allocInternal(size_t size);
-    void _freeInternal(LargeObj* obj, size_t size);
-    void _free(LargeObj* obj);
-
+    Heap* heap;
    LargeObj* head;
    LargeBlock* blocks;
    LargeFreeChunk* free_lists[NUM_FREE_LISTS]; /* 0 is for larger sizes */

-    Heap* heap;
+    void add_free_chunk(LargeFreeChunk* free_chunks, size_t size);
+    LargeFreeChunk* get_from_size_list(LargeFreeChunk** list, size_t size);
+    LargeObj* _alloc(size_t size);
+    void _freeLargeObj(LargeObj* obj);

 public:
-    LargeArena(Heap* heap) : head(NULL), blocks(NULL), heap(heap) {}
+    LargeArena(Heap* heap) : heap(heap), head(NULL), blocks(NULL) {}

    /* Largest object that can be allocated in a large block. */
    static constexpr size_t ALLOC_SIZE_LIMIT = BLOCK_SIZE - CHUNK_SIZE - sizeof(LargeObj);
@@ -333,9 +320,9 @@ public:
    GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
    void free(GCAllocation* alloc);

+    GCAllocation* allocationFrom(void* ptr);
    void freeUnmarked();

-    GCAllocation* allocationFrom(void* ptr);
    void getStatistics(HeapStatistics* stats);
 };

@@ -343,7 +330,20 @@ public:
 //
 // Objects are allocated with individual mmap() calls, and kept in a
 // linked list.  They are not reused.
-class HugeArena : public Arena<HUGE_ARENA_START> {
+class HugeArena : public Arena<HUGE_ARENA_START, ARENA_SIZE> {
+public:
+    HugeArena(Heap* heap) : heap(heap) {}
+
+    GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
+    GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
+    void free(GCAllocation* alloc);
+
+    GCAllocation* allocationFrom(void* ptr);
+    void freeUnmarked();
+
+    void getStatistics(HeapStatistics* stats);
+
+private:
    struct HugeObj {
        HugeObj* next, **prev;
        size_t obj_size;
@@ -369,18 +369,6 @@ class HugeArena : public Arena<HUGE_ARENA_START> {
    HugeObj* head;

    Heap* heap;
-
-public:
-    HugeArena(Heap* heap) : heap(heap) {}
-
-    GCAllocation* __attribute__((__malloc__)) alloc(size_t bytes);
-    GCAllocation* realloc(GCAllocation* alloc, size_t bytes);
-    void free(GCAllocation* alloc);
-
-    void freeUnmarked();
-
-    GCAllocation* allocationFrom(void* ptr);
-    void getStatistics(HeapStatistics* stats);
 };


@@ -420,10 +408,10 @@ public:
            return small_arena.alloc(bytes);
    }

-    void destroyContents(GCAllocation* alloc);
+    void destructContents(GCAllocation* alloc);

    void free(GCAllocation* alloc) {
-        destroyContents(alloc);
+        destructContents(alloc);

        if (large_arena.contains(alloc)) {
            large_arena.free(alloc);