Commit d33db3f8 authored by Yoni Fogel's avatar Yoni Fogel

Addresses #1463

Abstracted out the block translation table and the block allocator
into BLOCK_TABLE
All use is done by accessors surrounded by locks.


git-svn-id: file:///svn/toku/tokudb@9360 c7de825b-a66e-492c-adef-691d508d4ae1
parent 7cbba1ab
......@@ -37,6 +37,7 @@ build default: bins libs $(TEST_NEWBRT)
BRT_SOURCES = \
block_allocator \
block_table \
bread \
brt-serialize \
brt-verify \
......
......@@ -6,6 +6,12 @@
#include "brttypes.h"
#define BLOCK_ALLOCATOR_ALIGNMENT 4096
// How much must be reserved at the beginning for the block?
// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
// So 4096 should be enough.
#define BLOCK_ALLOCATOR_HEADER_RESERVE 4096
// A block allocator manages the allocation of variable-sized blocks.
// The translation of block numbers to addresses is handled elsewhere.
// The allocation of block numbers is handled elsewhere.
......
This diff is collapsed.
/* -*- mode: C; c-basic-offset: 4 -*- */
#ifndef BLOCKTABLE_H
#define BLOCKTABLE_H
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved."
typedef struct block_table *BLOCK_TABLE;
//Needed by tests, brtdump
struct block_translation_pair {
DISKOFF diskoff; // When in free list, set to the next free block. In this case it's really a BLOCKNUM.
DISKOFF size; // set to 0xFFFFFFFFFFFFFFFF for free
};
void toku_blocktable_lock_init(void);
void toku_blocktable_lock_destroy(void);
void toku_block_realloc(BLOCK_TABLE bt, BLOCKNUM b, u_int64_t size, u_int64_t *offset);
void toku_block_alloc(BLOCK_TABLE bt, u_int64_t size, u_int64_t *offset);
void toku_block_free(BLOCK_TABLE bt, u_int64_t offset);
DISKOFF toku_block_get_offset(BLOCK_TABLE bt, BLOCKNUM b);
DISKOFF toku_block_get_size(BLOCK_TABLE bt, BLOCKNUM b);
int toku_allocate_diskblocknumber(BLOCK_TABLE bt, BLOCKNUM *res, int *dirty, TOKULOGGER logger);
int toku_free_diskblocknumber(BLOCK_TABLE bt, BLOCKNUM *b, int *dirty, TOKULOGGER logger);
void toku_verify_diskblocknumber_allocated(BLOCK_TABLE bt, BLOCKNUM b);
void toku_block_verify_no_free_blocks(BLOCK_TABLE bt);
u_int64_t toku_block_allocator_allocated_limit(BLOCK_TABLE bt);
void toku_block_dump_translation_table(FILE *f, BLOCK_TABLE bt);
void toku_block_dump_translation(BLOCK_TABLE bt, u_int64_t offset);
void toku_blocktable_destroy(BLOCK_TABLE *btp);
void toku_blocktable_debug_set_translation(BLOCK_TABLE bt,
u_int64_t limit,
struct block_translation_pair *table);
void toku_blocktable_create(BLOCK_TABLE *btp,
BLOCKNUM free_blocks,
BLOCKNUM unused_blocks,
u_int64_t translated_blocknum_limit,
u_int64_t block_translation_address_on_disk,
u_int64_t block_translation_size_on_disk,
unsigned char *buffer);
void toku_blocktable_create_new(BLOCK_TABLE *bt);
void toku_block_recovery_set_unused_blocks(BLOCK_TABLE bt, BLOCKNUM newunused);
void toku_block_recovery_set_free_blocks(BLOCK_TABLE bt, BLOCKNUM newfree);
BLOCKNUM toku_block_get_unused_blocks(BLOCK_TABLE bt);
BLOCKNUM toku_block_get_free_blocks(BLOCK_TABLE bt);
u_int64_t toku_block_get_translated_blocknum_limit(BLOCK_TABLE bt);
void toku_block_memcpy_translation_table(BLOCK_TABLE bt, size_t n, void *p);
//Unlocked/multi ops
void toku_block_lock_for_multiple_operations(void);
void toku_block_unlock_for_multiple_operations(void);
void toku_block_realloc_translation_unlocked(BLOCK_TABLE bt);
void toku_block_wbuf_free_blocks_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf);
void toku_block_wbuf_unused_blocks_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf);
void toku_block_wbuf_translated_blocknum_limit_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf);
void toku_block_wbuf_block_translation_address_on_disk_unlocked(BLOCK_TABLE bt, struct wbuf *wbuf);
void toku_block_wbuf_init_and_fill_unlocked(BLOCK_TABLE bt, struct wbuf *w,
u_int64_t *size, u_int64_t *address);
#endif
......@@ -14,6 +14,7 @@
typedef void *OMTVALUE;
#include "omt.h"
#include "leafentry.h"
#include "block_table.h"
#ifndef BRT_FANOUT
#define BRT_FANOUT 16
......@@ -113,11 +114,6 @@ struct remembered_hash {
u_int32_t fullhash; // fullhash is the hashed value of fnum and root.
};
struct block_translation_pair {
DISKOFF diskoff; // When in free list, set to the next free block. In this case it's really a BLOCKNUM.
DISKOFF size; // set to 0xFFFFFFFFFFFFFFFF for free
};
// The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
struct brt_header {
......@@ -137,23 +133,7 @@ struct brt_header {
u_int64_t root_put_counter; // the generation number of the brt
// This is the map from block numbers to offsets
//int n_blocks, n_blocks_array_size;
//struct block_descriptor *blocks;
BLOCKNUM free_blocks; // free list for blocks. Use -1 to indicate that there are no free blocks
BLOCKNUM unused_blocks; // first unused block
u_int64_t translated_blocknum_limit;
struct block_translation_pair *block_translation;
// Where and how big is the block translation vector stored on disk.
// The size of the on_disk buffer may no longer match the max_blocknum_translated field, since blocks may have been allocated or freed.
// We need to remember this old information so we can free it properly.
u_int64_t block_translation_size_on_disk; // the size of the block containing the translation (i.e. 8 times the number of entries)
u_int64_t block_translation_address_on_disk; // 0 if there is no memory allocated
// The in-memory data structure for block allocation
BLOCK_ALLOCATOR block_allocator;
BLOCK_TABLE blocktable;
};
struct brt {
......@@ -292,12 +272,6 @@ void toku_brtheader_free (struct brt_header *h);
int toku_brtheader_close (CACHEFILE cachefile, void *header_v, char **error_string);
int toku_brtheader_checkpoint (CACHEFILE cachefile, void *header_v);
#define BLOCK_ALLOCATOR_ALIGNMENT 4096
// How much must be reserved at the beginning for the block?
// The actual header is 8+4+4+8+8_4+8+ the length of the db names + 1 pointer for each root.
// So 4096 should be enough.
#define BLOCK_ALLOCATOR_HEADER_RESERVE 4096
int toku_db_badformat(void);
#endif
This diff is collapsed.
This diff is collapsed.
......@@ -111,8 +111,6 @@ enum brt_header_flags {
int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater);
void extend_block_translation (BLOCKNUM blocknum, struct brt_header *h);
void toku_brt_init(void);
void toku_brt_destroy(void);
void toku_pwrite_lock_init(void);
......
......@@ -30,8 +30,10 @@ dump_header (int f, struct brt_header **header) {
else printf(" layout_version=%d\n", h->layout_version);
printf(" dirty=%d\n", h->dirty);
printf(" nodesize=%u\n", h->nodesize);
printf(" free_blocks=%" PRId64 "\n", h->free_blocks.b);
printf(" unused_memory=%" PRId64 "\n", h->unused_blocks.b);
BLOCKNUM free_blocks = toku_block_get_free_blocks(h->blocktable);
BLOCKNUM unused_blocks = toku_block_get_unused_blocks(h->blocktable);
printf(" free_blocks=%" PRId64 "\n", free_blocks.b);
printf(" unused_memory=%" PRId64 "\n", unused_blocks.b);
if (h->n_named_roots==-1) {
printf(" unnamed_root=%" PRId64 "\n", h->roots[0].b);
printf(" flags=%u\n", h->flags_array[0]);
......@@ -165,10 +167,7 @@ dump_node (int f, BLOCKNUM blocknum, struct brt_header *h) {
static void
dump_block_translation(struct brt_header *h, u_int64_t offset) {
if (offset < h->translated_blocknum_limit) {
struct block_translation_pair *bx = &h->block_translation[offset];
printf("%" PRIu64 ": %" PRId64 " %" PRId64 "\n", offset, bx->diskoff, bx->size);
}
toku_block_dump_translation(h->blocktable, offset);
}
static int
......@@ -187,28 +186,31 @@ dump_fragmentation(int f, struct brt_header *h) {
u_int64_t leafblocks = 0;
u_int64_t fragsizes = 0;
u_int64_t i;
for (i = 0; i < h->translated_blocknum_limit; i++) {
u_int64_t limit = toku_block_get_translated_blocknum_limit(h->blocktable);
for (i = 0; i < limit; i++) {
BRTNODE n;
BLOCKNUM blocknum = make_blocknum(i);
int r = toku_deserialize_brtnode_from (f, blocknum, 0 /*pass zero for hash, it doesn't matter*/, &n, h);
if (r != 0) continue;
blocksizes += h->block_translation[i].size;
DISKOFF size = toku_block_get_size(h->blocktable, blocknum);
blocksizes += size;
if (n->height == 0) {
leafsizes += h->block_translation[i].size;
leafsizes += size;
leafblocks += 1;
}
toku_brtnode_free(&n);
}
size_t n = h->translated_blocknum_limit * sizeof (struct block_translation_pair);
size_t n = limit * sizeof (struct block_translation_pair);
struct block_translation_pair *bx = toku_malloc(n);
memcpy(bx, h->block_translation, n);
qsort(bx, h->translated_blocknum_limit, sizeof (struct block_translation_pair), bxpcmp);
for (i = 0; i < h->translated_blocknum_limit - 1; i++) {
toku_block_memcpy_translation_table(h->blocktable, n, bx);
qsort(bx, limit, sizeof (struct block_translation_pair), bxpcmp);
for (i = 0; i < limit - 1; i++) {
// printf("%lu %lu %lu\n", i, bx[i].diskoff, bx[i].size);
fragsizes += bx[i+1].diskoff - (bx[i].diskoff + bx[i].size);
}
toku_free(bx);
printf("translated_blocknum_limit: %" PRIu64 "\n", h->translated_blocknum_limit);
printf("translated_blocknum_limit: %" PRIu64 "\n", limit);
printf("leafblocks: %" PRIu64 "\n", leafblocks);
printf("blocksizes: %" PRIu64 "\n", blocksizes);
printf("leafsizes: %" PRIu64 "\n", leafsizes);
......@@ -299,15 +301,24 @@ main (int argc, const char *argv[]) {
} else {
BLOCKNUM blocknum;
printf("Block translation:");
for (blocknum.b=0; blocknum.b<h->unused_blocks.b; blocknum.b++) {
u_int64_t limit = toku_block_get_translated_blocknum_limit(h->blocktable);
BLOCKNUM unused_blocks = toku_block_get_unused_blocks(h->blocktable);
size_t bx_size = limit * sizeof (struct block_translation_pair);
struct block_translation_pair *bx = toku_malloc(bx_size);
toku_block_memcpy_translation_table(h->blocktable, bx_size, bx);
for (blocknum.b=0; blocknum.b< unused_blocks.b; blocknum.b++) {
printf(" %" PRId64 ":", blocknum.b);
if (h->block_translation[blocknum.b].size == -1) printf("free");
else printf("%" PRId64 ":%" PRId64, h->block_translation[blocknum.b].diskoff, h->block_translation[blocknum.b].size);
if (bx[blocknum.b].size == -1) printf("free");
else printf("%" PRId64 ":%" PRId64, bx[blocknum.b].diskoff, bx[blocknum.b].size);
}
for (blocknum.b=1; blocknum.b<h->unused_blocks.b; blocknum.b++) {
if (h->block_translation[blocknum.b].size != -1)
for (blocknum.b=1; blocknum.b<unused_blocks.b; blocknum.b++) {
if (bx[blocknum.b].size != -1)
dump_node(f, blocknum, h);
}
toku_free(bx);
}
toku_brtheader_free(h);
toku_malloc_cleanup();
......
......@@ -137,8 +137,9 @@ static void toku_recover_fheader (LSN UU(lsn), TXNID UU(txnid),FILENUM filenum,L
XMALLOC(h->flags_array);
h->flags_array[0] = header.flags;
h->nodesize = header.nodesize;
h->free_blocks = header.free_blocks;
h->unused_blocks = header.unused_blocks;
assert(h->blocktable /* Not initialized. Is this used? */);
toku_block_recovery_set_free_blocks(h->blocktable, header.free_blocks);
toku_block_recovery_set_unused_blocks(h->blocktable, header.unused_blocks);
h->n_named_roots = header.n_named_roots;
r=toku_fifo_create(&h->fifo);
assert(r==0);
......@@ -687,7 +688,7 @@ toku_recover_changeunusedmemory (LSN UU(lsn), FILENUM filenum, BLOCKNUM UU(oldun
assert(r==0);
assert(pair->brt);
assert(pair->brt->h);
pair->brt->h->unused_blocks = newunused;
toku_block_recovery_set_unused_blocks(pair->brt->h->blocktable, newunused);
}
static int toku_recover_checkpoint (LSN UU(lsn)) {
......
......@@ -84,8 +84,6 @@ REGRESSION_TESTS_RAW = \
omt-cursor-test \
omt-test \
shortcut \
test1305 \
test1308a \
test-assert \
test-brt-delete-both \
test-brt-overflow \
......
......@@ -53,14 +53,13 @@ static void test_serialize(void) {
memset(btps, 0, sizeof(btps));
brt->h = brt_h;
brt_h->panic = 0; brt_h->panic_string = 0;
brt_h->translated_blocknum_limit = 1;
brt_h->block_translation = btps;
brt_h->block_translation[20].diskoff = 4096;
brt_h->block_translation[20].size = 100;
create_block_allocator(&brt_h->block_allocator, 4096, BLOCK_ALLOCATOR_ALIGNMENT);
toku_blocktable_create_new(&brt_h->blocktable);
toku_blocktable_debug_set_translation(brt_h->blocktable, 1, btps);
btps[20].diskoff = 4096;
btps[20].size = 100;
{
u_int64_t b;
block_allocator_alloc_block(brt_h->block_allocator, 100, &b);
toku_block_alloc(brt_h->blocktable, 100, &b);
assert(b==4096);
}
......@@ -120,9 +119,8 @@ static void test_serialize(void) {
toku_free(sn.u.n.childinfos);
toku_free(sn.u.n.childkeys);
block_allocator_free_block(brt_h->block_allocator, 4096);
destroy_block_allocator(&brt_h->block_allocator);
toku_free(brt_h->block_translation);
toku_block_free(brt_h->blocktable, 4096);
toku_blocktable_destroy(&brt_h->blocktable);
toku_free(brt_h);
toku_free(brt);
}
......
......@@ -15,6 +15,7 @@
/* This code requires that the buffer be big enough to hold whatever you put into it. */
/* This abstraction doesn't do a good job of hiding its internals.
* Why? The performance of this code is important, and we want to inline stuff */
//Why is size here an int instead of DISKOFF like in the initializer?
struct wbuf {
unsigned char *buf;
unsigned int size;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment