Commit 3e8a2988 authored by John Esmet's avatar John Esmet

FT-300 Add 'heat' to the block allocator API, which is a hint for how

likely the allocation will need to move again at the next checkpoint (we
pass the node height for this value). The new heat zone allocation
strategy uses the heat value to put nonleaf nodes towards the end of the
file and leaf nodes towards the beginning.
parent a1680150
...@@ -89,8 +89,9 @@ PATENT RIGHTS GRANT: ...@@ -89,8 +89,9 @@ PATENT RIGHTS GRANT:
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
#ident "$Id$" #ident "$Id$"
#include <string> #include <algorithm>
#include <cstring>
#include <string.h>
#include "portability/memory.h" #include "portability/memory.h"
#include "portability/toku_assert.h" #include "portability/toku_assert.h"
...@@ -186,18 +187,6 @@ void block_allocator::grow_blocks_array() { ...@@ -186,18 +187,6 @@ void block_allocator::grow_blocks_array() {
grow_blocks_array_by(1); grow_blocks_array_by(1);
} }
int block_allocator::compare_blockpairs(const void *av, const void *bv) {
const struct blockpair *a = (const struct blockpair *) av;
const struct blockpair *b = (const struct blockpair *) bv;
if (a->offset < b->offset) {
return -1;
} else if (a->offset > b->offset) {
return 1;
} else {
return 0;
}
}
void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment, void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint64_t alignment,
struct blockpair *pairs, uint64_t n_blocks) { struct blockpair *pairs, uint64_t n_blocks) {
_create_internal(reserve_at_beginning, alignment); _create_internal(reserve_at_beginning, alignment);
...@@ -205,7 +194,7 @@ void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint ...@@ -205,7 +194,7 @@ void block_allocator::create_from_blockpairs(uint64_t reserve_at_beginning, uint
_n_blocks = n_blocks; _n_blocks = n_blocks;
grow_blocks_array_by(_n_blocks); grow_blocks_array_by(_n_blocks);
memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair)); memcpy(_blocks_array, pairs, _n_blocks * sizeof(struct blockpair));
qsort(_blocks_array, _n_blocks, sizeof(struct blockpair), compare_blockpairs); std::sort(_blocks_array, _blocks_array + _n_blocks);
for (uint64_t i = 0; i < _n_blocks; i++) { for (uint64_t i = 0; i < _n_blocks; i++) {
// Allocator does not support size 0 blocks. See block_allocator_free_block. // Allocator does not support size 0 blocks. See block_allocator_free_block.
invariant(_blocks_array[i].size > 0); invariant(_blocks_array[i].size > 0);
...@@ -224,19 +213,21 @@ static inline uint64_t align(uint64_t value, uint64_t ba_alignment) { ...@@ -224,19 +213,21 @@ static inline uint64_t align(uint64_t value, uint64_t ba_alignment) {
} }
struct block_allocator::blockpair * struct block_allocator::blockpair *
block_allocator::choose_block_to_alloc_after(size_t size) { block_allocator::choose_block_to_alloc_after(size_t size, uint64_t heat) {
switch (_strategy) { switch (_strategy) {
case BA_STRATEGY_FIRST_FIT: case BA_STRATEGY_FIRST_FIT:
return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment); return block_allocator_strategy::first_fit(_blocks_array, _n_blocks, size, _alignment);
case BA_STRATEGY_BEST_FIT: case BA_STRATEGY_BEST_FIT:
return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment); return block_allocator_strategy::best_fit(_blocks_array, _n_blocks, size, _alignment);
case BA_STRATEGY_HEAT_ZONE:
return block_allocator_strategy::heat_zone(_blocks_array, _n_blocks, size, _alignment, heat);
default: default:
abort(); abort();
} }
} }
// Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512). // Effect: Allocate a block. The resulting block must be aligned on the ba->alignment (which to make direct_io happy must be a positive multiple of 512).
void block_allocator::alloc_block(uint64_t size, uint64_t *offset) { void block_allocator::alloc_block(uint64_t size, uint64_t heat, uint64_t *offset) {
struct blockpair *bp; struct blockpair *bp;
// Allocator does not support size 0 blocks. See block_allocator_free_block. // Allocator does not support size 0 blocks. See block_allocator_free_block.
...@@ -264,7 +255,7 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) { ...@@ -264,7 +255,7 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
goto done; goto done;
} }
bp = choose_block_to_alloc_after(size); bp = choose_block_to_alloc_after(size, heat);
if (bp != nullptr) { if (bp != nullptr) {
// our allocation strategy chose the space after `bp' to fit the new block // our allocation strategy chose the space after `bp' to fit the new block
uint64_t answer_offset = align(bp->offset + bp->size, _alignment); uint64_t answer_offset = align(bp->offset + bp->size, _alignment);
...@@ -289,8 +280,10 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) { ...@@ -289,8 +280,10 @@ void block_allocator::alloc_block(uint64_t size, uint64_t *offset) {
VALIDATE(); VALIDATE();
if (ba_trace_file != nullptr) { if (ba_trace_file != nullptr) {
fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu\n", fprintf(ba_trace_file, "ba_trace_alloc %p %lu %lu %lu\n",
this, static_cast<unsigned long>(size), static_cast<unsigned long>(*offset)); this, static_cast<unsigned long>(size),
static_cast<unsigned long>(heat),
static_cast<unsigned long>(*offset));
fflush(ba_trace_file); fflush(ba_trace_file);
} }
} }
......
...@@ -126,7 +126,8 @@ class block_allocator { ...@@ -126,7 +126,8 @@ class block_allocator {
enum allocation_strategy { enum allocation_strategy {
BA_STRATEGY_FIRST_FIT = 1, BA_STRATEGY_FIRST_FIT = 1,
BA_STRATEGY_BEST_FIT BA_STRATEGY_BEST_FIT,
BA_STRATEGY_HEAT_ZONE
}; };
struct blockpair { struct blockpair {
...@@ -135,6 +136,12 @@ class block_allocator { ...@@ -135,6 +136,12 @@ class block_allocator {
blockpair(uint64_t o, uint64_t s) : blockpair(uint64_t o, uint64_t s) :
offset(o), size(s) { offset(o), size(s) {
} }
int operator<(const struct blockpair &rhs) {
return offset < rhs.offset;
}
int operator<(const uint64_t &o) {
return offset < o;
}
}; };
// Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block. // Effect: Create a block allocator, in which the first RESERVE_AT_BEGINNING bytes are not put into a block.
...@@ -172,7 +179,9 @@ class block_allocator { ...@@ -172,7 +179,9 @@ class block_allocator {
// Parameters: // Parameters:
// size (IN): The size of the block. (The size does not have to be aligned.) // size (IN): The size of the block. (The size does not have to be aligned.)
// offset (OUT): The location of the block. // offset (OUT): The location of the block.
void alloc_block(uint64_t size, uint64_t *offset); // heat (IN): A higher heat means we should be prepared to free this block soon (perhaps in the next checkpoint)
// Heat values are lexiographically ordered (like integers), but their specific values are arbitrary
void alloc_block(uint64_t size, uint64_t heat, uint64_t *offset);
// Effect: Free the block at offset. // Effect: Free the block at offset.
// Requires: There must be a block currently allocated at that offset. // Requires: There must be a block currently allocated at that offset.
...@@ -229,9 +238,7 @@ class block_allocator { ...@@ -229,9 +238,7 @@ class block_allocator {
void grow_blocks_array_by(uint64_t n_to_add); void grow_blocks_array_by(uint64_t n_to_add);
void grow_blocks_array(); void grow_blocks_array();
int64_t find_block(uint64_t offset); int64_t find_block(uint64_t offset);
struct blockpair *choose_block_to_alloc_after(size_t size); struct blockpair *choose_block_to_alloc_after(size_t size, uint64_t heat);
static int compare_blockpairs(const void *av, const void *bv);
// How much to reserve at the beginning // How much to reserve at the beginning
uint64_t _reserve_at_beginning; uint64_t _reserve_at_beginning;
......
...@@ -86,13 +86,31 @@ PATENT RIGHTS GRANT: ...@@ -86,13 +86,31 @@ PATENT RIGHTS GRANT:
under this License. under this License.
*/ */
#include <algorithm>
#include "portability/toku_assert.h"
#include "ft/serialize/block_allocator_strategy.h" #include "ft/serialize/block_allocator_strategy.h"
static uint64_t _align(uint64_t value, uint64_t ba_alignment) {
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment;
}
// First fit block allocation // First fit block allocation
struct block_allocator::blockpair * static struct block_allocator::blockpair *
block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array, _first_fit(struct block_allocator::blockpair *blocks_array,
uint64_t n_blocks, uint64_t size, uint64_t alignment) { uint64_t n_blocks, uint64_t size, uint64_t alignment,
for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) { bool forward) {
if (n_blocks == 1) {
// won't enter loop, can't underflow the direction < 0 case
return nullptr;
}
for (uint64_t n_spaces_to_check = n_blocks - 1,
blocknum = forward ? 0 : n_blocks - 2;
n_spaces_to_check > 0;
n_spaces_to_check--, forward ? blocknum++ : blocknum--) {
invariant(blocknum < n_blocks);
// Consider the space after blocknum // Consider the space after blocknum
struct block_allocator::blockpair *bp = &blocks_array[blocknum]; struct block_allocator::blockpair *bp = &blocks_array[blocknum];
uint64_t possible_offset = _align(bp->offset + bp->size, alignment); uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
...@@ -103,19 +121,26 @@ block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_ar ...@@ -103,19 +121,26 @@ block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_ar
return nullptr; return nullptr;
} }
struct block_allocator::blockpair *
block_allocator_strategy::first_fit(struct block_allocator::blockpair *blocks_array,
uint64_t n_blocks, uint64_t size, uint64_t alignment) {
return _first_fit(blocks_array, n_blocks, size, alignment, true);
}
// Best fit block allocation // Best fit block allocation
struct block_allocator::blockpair * struct block_allocator::blockpair *
block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array, block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_array,
uint64_t n_blocks, uint64_t size, uint64_t alignment) { uint64_t n_blocks, uint64_t size, uint64_t alignment) {
struct block_allocator::blockpair *best_bp = nullptr; struct block_allocator::blockpair *best_bp = nullptr;
uint64_t best_hole_size = 0; uint64_t best_hole_size = 0;
for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) { for (uint64_t blocknum = 0; blocknum + 1 < n_blocks; blocknum++) {
// Consider the space after blocknum // Consider the space after blocknum
struct block_allocator::blockpair *bp = &blocks_array[blocknum]; struct block_allocator::blockpair *bp = &blocks_array[blocknum];
uint64_t possible_offset = _align(bp->offset + bp->size, alignment); uint64_t possible_offset = _align(bp->offset + bp->size, alignment);
if (possible_offset + size <= bp[1].offset) { uint64_t possible_end_offset = possible_offset + size;
if (possible_end_offset <= bp[1].offset) {
// It fits here. Is it the best fit? // It fits here. Is it the best fit?
uint64_t hole_size = (bp[1].offset - possible_offset) + size; uint64_t hole_size = bp[1].offset - possible_end_offset;
if (best_bp == nullptr || hole_size < best_hole_size) { if (best_bp == nullptr || hole_size < best_hole_size) {
best_hole_size = hole_size; best_hole_size = hole_size;
best_bp = bp; best_bp = bp;
...@@ -124,3 +149,41 @@ block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_arr ...@@ -124,3 +149,41 @@ block_allocator_strategy::best_fit(struct block_allocator::blockpair *blocks_arr
} }
return best_bp; return best_bp;
} }
struct block_allocator::blockpair *
block_allocator_strategy::heat_zone(struct block_allocator::blockpair *blocks_array,
uint64_t n_blocks, uint64_t size, uint64_t alignment,
uint64_t heat) {
if (heat > 0) {
const double hot_zone_threshold = 0.85;
// Hot allocation. Find the beginning of the hot zone.
struct block_allocator::blockpair *bp = &blocks_array[n_blocks - 1];
uint64_t highest_offset = _align(bp->offset + bp->size, alignment);
uint64_t hot_zone_offset = static_cast<uint64_t>(hot_zone_threshold * highest_offset);
bp = std::lower_bound(blocks_array, blocks_array + n_blocks, hot_zone_offset);
uint64_t blocks_in_zone = (blocks_array + n_blocks) - bp;
uint64_t blocks_outside_zone = bp - blocks_array;
invariant(blocks_in_zone + blocks_outside_zone == n_blocks);
if (blocks_in_zone > 0) {
// Find the first fit in the hot zone, going forward.
bp = _first_fit(bp, blocks_in_zone, size, alignment, true);
if (bp != nullptr) {
return bp;
}
}
if (blocks_outside_zone > 0) {
// Find the first fit in the cold zone, going backwards.
bp = _first_fit(bp, blocks_outside_zone, size, alignment, false);
if (bp != nullptr) {
return bp;
}
}
} else {
// Cold allocations are simply first-fit from the beginning.
return _first_fit(blocks_array, n_blocks, size, alignment, true);
}
return nullptr;
}
...@@ -102,11 +102,10 @@ class block_allocator_strategy { ...@@ -102,11 +102,10 @@ class block_allocator_strategy {
static struct block_allocator::blockpair * static struct block_allocator::blockpair *
best_fit(struct block_allocator::blockpair *blocks_array, best_fit(struct block_allocator::blockpair *blocks_array,
uint64_t n_blocks, uint64_t size, uint64_t alignment); uint64_t n_blocks, uint64_t size, uint64_t alignment);
private: static struct block_allocator::blockpair *
// Effect: align a value by rounding up. heat_zone(struct block_allocator::blockpair *blocks_array,
static inline uint64_t _align(uint64_t value, uint64_t ba_alignment) { uint64_t n_blocks, uint64_t size, uint64_t alignment,
return ((value + ba_alignment - 1) / ba_alignment) * ba_alignment; uint64_t heat);
}
}; };
...@@ -447,7 +447,7 @@ bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM ...@@ -447,7 +447,7 @@ bool block_table::_translation_prevents_freeing(struct translation *t, BLOCKNUM
old_pair->u.diskoff == t->block_translation[b.b].u.diskoff; old_pair->u.diskoff == t->block_translation[b.b].u.diskoff;
} }
void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint) { void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, bool for_checkpoint, uint64_t heat) {
toku_mutex_assert_locked(&_mutex); toku_mutex_assert_locked(&_mutex);
ft_set_dirty(ft, for_checkpoint); ft_set_dirty(ft, for_checkpoint);
...@@ -466,7 +466,7 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o ...@@ -466,7 +466,7 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *o
if (size > 0) { if (size > 0) {
// Allocate a new block if the size is greater than 0, // Allocate a new block if the size is greater than 0,
// if the size is just 0, offset will be set to diskoff_unused // if the size is just 0, offset will be set to diskoff_unused
_bt_block_allocator.alloc_block(size, &allocator_offset); _bt_block_allocator.alloc_block(size, heat, &allocator_offset);
} }
t->block_translation[b.b].u.diskoff = allocator_offset; t->block_translation[b.b].u.diskoff = allocator_offset;
*offset = allocator_offset; *offset = allocator_offset;
...@@ -497,11 +497,11 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF ...@@ -497,11 +497,11 @@ void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, DISKOF
} }
} }
void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint) { void block_table::realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, FT ft, int fd, bool for_checkpoint, uint64_t heat) {
_mutex_lock(); _mutex_lock();
struct translation *t = &_current; struct translation *t = &_current;
_verify_valid_freeable_blocknum(t, b); _verify_valid_freeable_blocknum(t, b);
_realloc_on_disk_internal(b, size, offset, ft, for_checkpoint); _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint, heat);
_ensure_safe_write_unlocked(fd, size, *offset); _ensure_safe_write_unlocked(fd, size, *offset);
_mutex_unlock(); _mutex_unlock();
...@@ -526,7 +526,7 @@ void block_table::_alloc_inprogress_translation_on_disk_unlocked() { ...@@ -526,7 +526,7 @@ void block_table::_alloc_inprogress_translation_on_disk_unlocked() {
//Allocate a new block //Allocate a new block
int64_t size = _calculate_size_on_disk(t); int64_t size = _calculate_size_on_disk(t);
uint64_t offset; uint64_t offset;
_bt_block_allocator.alloc_block(size, &offset); _bt_block_allocator.alloc_block(size, 0, &offset);
t->block_translation[b.b].u.diskoff = offset; t->block_translation[b.b].u.diskoff = offset;
t->block_translation[b.b].size = size; t->block_translation[b.b].size = size;
} }
...@@ -930,7 +930,7 @@ void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_siz ...@@ -930,7 +930,7 @@ void block_table::internal_fragmentation(int64_t *total_sizep, int64_t *used_siz
void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) { void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, FT ft) {
toku_mutex_assert_locked(&_mutex); toku_mutex_assert_locked(&_mutex);
BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR); BLOCKNUM b = make_blocknum(RESERVED_BLOCKNUM_DESCRIPTOR);
_realloc_on_disk_internal(b, size, offset, ft, false); _realloc_on_disk_internal(b, size, offset, ft, false, 0);
} }
void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) { void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, int fd) {
......
...@@ -167,7 +167,7 @@ class block_table { ...@@ -167,7 +167,7 @@ class block_table {
// Blocknums // Blocknums
void allocate_blocknum(BLOCKNUM *res, struct ft *ft); void allocate_blocknum(BLOCKNUM *res, struct ft *ft);
void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint); void realloc_on_disk(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, int fd, bool for_checkpoint, uint64_t heat);
void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint); void free_blocknum(BLOCKNUM *b, struct ft *ft, bool for_checkpoint);
void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); void translate_blocknum_to_offset_size(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
void free_unused_blocknums(BLOCKNUM root); void free_unused_blocknums(BLOCKNUM root);
...@@ -258,7 +258,7 @@ class block_table { ...@@ -258,7 +258,7 @@ class block_table {
void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft); void _allocate_blocknum_unlocked(BLOCKNUM *res, struct ft *ft);
void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint); void _free_blocknum_unlocked(BLOCKNUM *bp, struct ft *ft, bool for_checkpoint);
void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft); void _realloc_descriptor_on_disk_unlocked(DISKOFF size, DISKOFF *offset, struct ft *ft);
void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint); void _realloc_on_disk_internal(BLOCKNUM b, DISKOFF size, DISKOFF *offset, struct ft *ft, bool for_checkpoint, uint64_t heat);
void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size); void _translate_blocknum_to_offset_size_unlocked(BLOCKNUM b, DISKOFF *offset, DISKOFF *size);
// File management // File management
......
...@@ -847,8 +847,12 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA ...@@ -847,8 +847,12 @@ toku_serialize_ftnode_to (int fd, BLOCKNUM blocknum, FTNODE node, FTNODE_DISK_DA
invariant(blocknum.b>=0); invariant(blocknum.b>=0);
DISKOFF offset; DISKOFF offset;
// Dirties the ft
ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
ft, fd, for_checkpoint); //dirties h ft, fd, for_checkpoint,
// Allocations for nodes high in the tree are considered 'hot',
// as they are likely to move again in the next checkpoint.
node->height);
tokutime_t t0 = toku_time_now(); tokutime_t t0 = toku_time_now();
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
...@@ -2542,7 +2546,11 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA ...@@ -2542,7 +2546,11 @@ toku_serialize_rollback_log_to (int fd, ROLLBACK_LOG_NODE log, SERIALIZED_ROLLBA
// Dirties the ft // Dirties the ft
DISKOFF offset; DISKOFF offset;
ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset, ft->blocktable.realloc_on_disk(blocknum, n_to_write, &offset,
ft, fd, for_checkpoint); ft, fd, for_checkpoint,
// We consider rollback log flushing the hottest possible allocation,
// since rollback logs are short-lived compared to FT nodes.
INT_MAX);
toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset);
toku_free(compressed_buf); toku_free(compressed_buf);
if (!is_serialized) { if (!is_serialized) {
......
...@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT: ...@@ -93,7 +93,7 @@ PATENT RIGHTS GRANT:
static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) { static void ba_alloc(block_allocator *ba, uint64_t size, uint64_t *answer) {
ba->validate(); ba->validate();
uint64_t actual_answer; uint64_t actual_answer;
ba->alloc_block(512 * size, &actual_answer); ba->alloc_block(512 * size, 0, &actual_answer);
ba->validate(); ba->validate();
assert(actual_answer%512==0); assert(actual_answer%512==0);
......
...@@ -434,7 +434,7 @@ test_prefetching(void) { ...@@ -434,7 +434,7 @@ test_prefetching(void) {
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
......
...@@ -370,7 +370,7 @@ test_serialize_nonleaf(void) { ...@@ -370,7 +370,7 @@ test_serialize_nonleaf(void) {
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
...@@ -450,7 +450,7 @@ test_serialize_leaf(void) { ...@@ -450,7 +450,7 @@ test_serialize_leaf(void) {
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
......
...@@ -210,7 +210,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de ...@@ -210,7 +210,7 @@ test_serialize_leaf(int valsize, int nelts, double entropy, int ser_runs, int de
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
...@@ -373,7 +373,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int ...@@ -373,7 +373,7 @@ test_serialize_nonleaf(int valsize, int nelts, double entropy, int ser_runs, int
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
......
...@@ -314,7 +314,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) { ...@@ -314,7 +314,7 @@ test_serialize_leaf_check_msn(enum ftnode_verify_type bft, bool do_clone) {
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
...@@ -447,7 +447,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone ...@@ -447,7 +447,7 @@ test_serialize_leaf_with_large_pivots(enum ftnode_verify_type bft, bool do_clone
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
...@@ -573,7 +573,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) { ...@@ -573,7 +573,7 @@ test_serialize_leaf_with_many_rows(enum ftnode_verify_type bft, bool do_clone) {
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
...@@ -708,7 +708,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone) ...@@ -708,7 +708,7 @@ test_serialize_leaf_with_large_rows(enum ftnode_verify_type bft, bool do_clone)
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
...@@ -844,7 +844,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool ...@@ -844,7 +844,7 @@ test_serialize_leaf_with_empty_basement_nodes(enum ftnode_verify_type bft, bool
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
...@@ -964,7 +964,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b ...@@ -964,7 +964,7 @@ test_serialize_leaf_with_multiple_empty_basement_nodes(enum ftnode_verify_type b
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
...@@ -1087,7 +1087,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) { ...@@ -1087,7 +1087,7 @@ test_serialize_nonleaf(enum ftnode_verify_type bft, bool do_clone) {
{ {
DISKOFF offset; DISKOFF offset;
DISKOFF size; DISKOFF size;
ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false); ft_h->blocktable.realloc_on_disk(b, 100, &offset, ft_h, fd, false, 0);
assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE); assert(offset==(DISKOFF)block_allocator::BLOCK_ALLOCATOR_TOTAL_HEADER_RESERVE);
ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size); ft_h->blocktable.translate_blocknum_to_offset_size(b, &offset, &size);
......
...@@ -126,7 +126,7 @@ toku_rollback_flush_unused_log( ...@@ -126,7 +126,7 @@ toku_rollback_flush_unused_log(
{ {
if (write_me) { if (write_me) {
DISKOFF offset; DISKOFF offset;
ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint); ft->blocktable.realloc_on_disk(logname, 0, &offset, ft, fd, for_checkpoint, INT_MAX);
} }
if (!keep_me && !is_clone) { if (!keep_me && !is_clone) {
toku_free(log); toku_free(log);
......
...@@ -208,6 +208,7 @@ static vector<string> canonicalize_trace_from(FILE *file) { ...@@ -208,6 +208,7 @@ static vector<string> canonicalize_trace_from(FILE *file) {
if (fn == "ba_trace_alloc") { if (fn == "ba_trace_alloc") {
const uint64_t size = parse_uint64(&ptr, line_num); const uint64_t size = parse_uint64(&ptr, line_num);
const uint64_t heat = parse_uint64(&ptr, line_num);
const uint64_t offset = parse_uint64(&ptr, line_num); const uint64_t offset = parse_uint64(&ptr, line_num);
ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num); ba_replay_assert(map->count(offset) == 0, "corrupted trace: double alloc", line, line_num);
...@@ -215,7 +216,7 @@ static vector<string> canonicalize_trace_from(FILE *file) { ...@@ -215,7 +216,7 @@ static vector<string> canonicalize_trace_from(FILE *file) {
(*map)[offset] = allocation_seq_num; (*map)[offset] = allocation_seq_num;
// translate `offset = alloc(size)' to `asn = alloc(size)' // translate `offset = alloc(size)' to `asn = alloc(size)'
ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << allocation_seq_num << std::endl; ss << fn << ' ' << canonical_allocator_id << ' ' << size << ' ' << heat << ' ' << allocation_seq_num << std::endl;
allocation_seq_num++; allocation_seq_num++;
} else if (fn == "ba_trace_free") { } else if (fn == "ba_trace_free") {
const uint64_t offset = parse_uint64(&ptr, line_num); const uint64_t offset = parse_uint64(&ptr, line_num);
...@@ -282,12 +283,13 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace ...@@ -282,12 +283,13 @@ static void replay_canonicalized_trace(const vector<string> &canonicalized_trace
block_allocator *ba = (*allocator_map)[allocator_id]; block_allocator *ba = (*allocator_map)[allocator_id];
if (fn == "ba_trace_alloc") { if (fn == "ba_trace_alloc") {
const uint64_t size = parse_uint64(&ptr, line_num); const uint64_t size = parse_uint64(&ptr, line_num);
const uint64_t heat = parse_uint64(&ptr, line_num);
const uint64_t asn = parse_uint64(&ptr, line_num); const uint64_t asn = parse_uint64(&ptr, line_num);
ba_replay_assert(seq_num_to_offset.count(asn) == 0, ba_replay_assert(seq_num_to_offset.count(asn) == 0,
"corrupted canonical trace: double alloc (asn in use)", line, line_num); "corrupted canonical trace: double alloc (asn in use)", line, line_num);
uint64_t offset; uint64_t offset;
ba->alloc_block(size, &offset); ba->alloc_block(size, heat, &offset);
seq_num_to_offset[asn] = offset; seq_num_to_offset[asn] = offset;
} else if (fn == "ba_trace_free") { } else if (fn == "ba_trace_free") {
const uint64_t asn = parse_uint64(&ptr, line_num); const uint64_t asn = parse_uint64(&ptr, line_num);
...@@ -318,6 +320,8 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) { ...@@ -318,6 +320,8 @@ static const char *strategy_str(block_allocator::allocation_strategy strategy) {
return "first-fit"; return "first-fit";
case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT: case block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT:
return "best-fit"; return "best-fit";
case block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE:
return "heat-zone";
default: default:
abort(); abort();
} }
...@@ -361,6 +365,8 @@ int main(void) { ...@@ -361,6 +365,8 @@ int main(void) {
vector<enum block_allocator::allocation_strategy> candidate_strategies; vector<enum block_allocator::allocation_strategy> candidate_strategies;
candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT); candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_FIRST_FIT);
candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_BEST_FIT);
candidate_strategies.push_back(block_allocator::allocation_strategy::BA_STRATEGY_HEAT_ZONE);
for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin(); for (vector<enum block_allocator::allocation_strategy>::const_iterator it = candidate_strategies.begin();
it != candidate_strategies.end(); it++) { it != candidate_strategies.end(); it++) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment