Commit 4232a577 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul Committed by Yoni Fogel

free diskblocks. Addresses #1195.

git-svn-id: file:///svn/toku/tokudb.1195@7679 c7de825b-a66e-492c-adef-691d508d4ae1
parent de600712
...@@ -115,8 +115,8 @@ struct remembered_hash { ...@@ -115,8 +115,8 @@ struct remembered_hash {
}; };
struct block_translation_pair { struct block_translation_pair {
DISKOFF diskoff; DISKOFF diskoff; // When in free list, set to the next free block. In this case it's really a BLOCKNUM.
DISKOFF size; DISKOFF size; // set to 0xFFFFFFFFFFFFFFFF for free
}; };
// The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata. // The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
......
...@@ -294,7 +294,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct ...@@ -294,7 +294,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct
assert(r==Z_OK); assert(r==Z_OK);
} }
if (0) printf("Size before compressing %u, after compression %lu\n", calculated_size-uncompressed_magic_len, compressed_len); if (0) printf("Block %" PRId64 " Size before compressing %u, after compression %lu\n", blocknum.b, calculated_size-uncompressed_magic_len, compressed_len);
((int32_t*)(compressed_buf+uncompressed_magic_len))[0] = htonl(compressed_len); ((int32_t*)(compressed_buf+uncompressed_magic_len))[0] = htonl(compressed_len);
((int32_t*)(compressed_buf+uncompressed_magic_len))[1] = htonl(uncompressed_len); ((int32_t*)(compressed_buf+uncompressed_magic_len))[1] = htonl(uncompressed_len);
...@@ -308,18 +308,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct ...@@ -308,18 +308,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct
//printf("%s:%d translated_blocknum_limit=%lu blocknum.b=%lu\n", __FILE__, __LINE__, h->translated_blocknum_limit, blocknum.b); //printf("%s:%d translated_blocknum_limit=%lu blocknum.b=%lu\n", __FILE__, __LINE__, h->translated_blocknum_limit, blocknum.b);
//printf("%s:%d allocator=%p\n", __FILE__, __LINE__, h->block_allocator); //printf("%s:%d allocator=%p\n", __FILE__, __LINE__, h->block_allocator);
//printf("%s:%d bt=%p\n", __FILE__, __LINE__, h->block_translation); //printf("%s:%d bt=%p\n", __FILE__, __LINE__, h->block_translation);
if (h->translated_blocknum_limit <= (u_int64_t)blocknum.b) { extend_block_translation(blocknum, h);
if (h->block_translation == 0) assert(h->translated_blocknum_limit==0);
u_int64_t new_limit = blocknum.b + 1;
u_int64_t old_limit = h->translated_blocknum_limit;
u_int64_t j;
XREALLOC_N(new_limit, h->block_translation);
for (j=old_limit; j<new_limit; j++) {
h->block_translation[j].diskoff = 0;
h->block_translation[j].size = 0;
}
h->translated_blocknum_limit = new_limit;
}
if (h->block_translation[blocknum.b].size > 0) { if (h->block_translation[blocknum.b].size > 0) {
block_allocator_free_block(h->block_allocator, h->block_translation[blocknum.b].diskoff); block_allocator_free_block(h->block_allocator, h->block_translation[blocknum.b].diskoff);
h->block_translation[blocknum.b].diskoff = 0; h->block_translation[blocknum.b].diskoff = 0;
...@@ -344,6 +333,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct ...@@ -344,6 +333,7 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, struct
} }
int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h) { int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, BRTNODE *brtnode, struct brt_header *h) {
if (0) printf("Deserializing Block %" PRId64 "\n", blocknum.b);
assert(0 <= blocknum.b && (u_int64_t)blocknum.b < h->translated_blocknum_limit); assert(0 <= blocknum.b && (u_int64_t)blocknum.b < h->translated_blocknum_limit);
DISKOFF offset = h->block_translation[blocknum.b].diskoff; DISKOFF offset = h->block_translation[blocknum.b].diskoff;
TAGMALLOC(BRTNODE, result); TAGMALLOC(BRTNODE, result);
...@@ -371,8 +361,8 @@ int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash ...@@ -371,8 +361,8 @@ int toku_deserialize_brtnode_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash
compressed_size = ntohl(*(u_int32_t*)(&uncompressed_header[uncompressed_magic_len])); compressed_size = ntohl(*(u_int32_t*)(&uncompressed_header[uncompressed_magic_len]));
if (compressed_size<=0 || compressed_size>(1<<30)) { r = DB_BADFORMAT; goto died0; } if (compressed_size<=0 || compressed_size>(1<<30)) { r = DB_BADFORMAT; goto died0; }
uncompressed_size = ntohl(*(u_int32_t*)(&uncompressed_header[uncompressed_magic_len+4])); uncompressed_size = ntohl(*(u_int32_t*)(&uncompressed_header[uncompressed_magic_len+4]));
if (0) printf("Block %" PRId64 " Compressed size = %u, uncompressed size=%u\n", blocknum.b, compressed_size, uncompressed_size);
if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { r = DB_BADFORMAT; goto died0; } if (uncompressed_size<=0 || uncompressed_size>(1<<30)) { r = DB_BADFORMAT; goto died0; }
if (0) printf("Compressed size = %u, uncompressed size=%u\n", compressed_size, uncompressed_size);
} }
//printf("%s:%d serializing %" PRIu64 " size=%d\n", __FILE__, __LINE__, blocknum.b, uncompressed_size); //printf("%s:%d serializing %" PRIu64 " size=%d\n", __FILE__, __LINE__, blocknum.b, uncompressed_size);
......
...@@ -546,13 +546,59 @@ void toku_brtheader_free (struct brt_header *h) { ...@@ -546,13 +546,59 @@ void toku_brtheader_free (struct brt_header *h) {
toku_free(h); toku_free(h);
} }
void
extend_block_translation (BLOCKNUM blocknum, struct brt_header *h)
// Effect: Record a block translation. This means extending the translation table, and setting the diskoff and size to zero in any of the unused spots.
{
if (h->translated_blocknum_limit <= (u_int64_t)blocknum.b) {
if (h->block_translation == 0) assert(h->translated_blocknum_limit==0);
u_int64_t new_limit = blocknum.b + 1;
u_int64_t old_limit = h->translated_blocknum_limit;
u_int64_t j;
XREALLOC_N(new_limit, h->block_translation);
for (j=old_limit; j<new_limit; j++) {
h->block_translation[j].diskoff = 0;
h->block_translation[j].size = 0;
}
h->translated_blocknum_limit = new_limit;
}
}
const DISKOFF diskoff_is_null = (DISKOFF)-1; // in a freelist, this indicates end of list
const DISKOFF size_is_free = (DISKOFF)-1;
static int static int
allocate_diskblocknumber (BLOCKNUM *res, BRT brt, TOKULOGGER logger __attribute__((__unused__))) { allocate_diskblocknumber (BLOCKNUM *res, BRT brt, TOKULOGGER logger __attribute__((__unused__))) {
assert(brt->h->free_blocks.b == -1); // no blocks in the free list BLOCKNUM result;
BLOCKNUM result = brt->h->unused_blocks; if (brt->h->free_blocks.b == diskoff_is_null) {
// no blocks in the free list
result = brt->h->unused_blocks;
brt->h->unused_blocks.b++; brt->h->unused_blocks.b++;
brt->h->dirty = 1; } else {
result = brt->h->free_blocks;
assert(brt->h->block_translation[result.b].size = size_is_free);
brt->h->block_translation[result.b].size = 0;
brt->h->free_blocks.b = brt->h->block_translation[result.b].diskoff; // pop the freelist
}
assert(result.b>0);
*res = result; *res = result;
brt->h->dirty = 1;
return 0;
}
static int
free_diskblocknumber (BLOCKNUM *b, struct brt_header *h, TOKULOGGER logger __attribute__((__unused__)))
// Effect: Free a diskblock
// Watch out for the case where the disk block was never yet written to disk and is beyond the translated_blocknum_limit.
{
extend_block_translation(*b, h);
assert((u_int64_t)b->b <= h->translated_blocknum_limit);
assert(h->block_translation[b->b].size != size_is_free);
h->block_translation[b->b].size = size_is_free;
h->block_translation[b->b].diskoff = h->free_blocks.b;
h->free_blocks.b = b->b;
b->b = 0;
h->dirty = 1;
return 0; return 0;
} }
...@@ -2080,14 +2126,18 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, TOKUL ...@@ -2080,14 +2126,18 @@ brt_merge_child (BRT t, BRTNODE node, int childnum_to_merge, BOOL *did_io, TOKUL
// Unpin both, and return the first nonzero error code that is found // Unpin both, and return the first nonzero error code that is found
assert(node->dirty); assert(node->dirty);
{ {
int rrb1 = 0;
int rra = toku_unpin_brtnode(t, childa); int rra = toku_unpin_brtnode(t, childa);
int rrb; int rrb;
if (did_merge) { if (did_merge) {
rrb = toku_cachetable_unpin_and_remove(t->cf, childb->thisnodename); BLOCKNUM bn = childb->thisnodename;
rrb = toku_cachetable_unpin_and_remove(t->cf, bn);
rrb1 = free_diskblocknumber(&bn, t->h, logger);
} else { } else {
rrb = toku_unpin_brtnode(t, childb); rrb = toku_unpin_brtnode(t, childb);
} }
if (rrb1) return rrb1;
if (rra) return rra; if (rra) return rra;
if (rrb) return rrb; if (rrb) return rrb;
} }
...@@ -4125,6 +4175,12 @@ int toku_dump_brt (FILE *f, BRT brt) { ...@@ -4125,6 +4175,12 @@ int toku_dump_brt (FILE *f, BRT brt) {
CACHEKEY *rootp; CACHEKEY *rootp;
assert(brt->h); assert(brt->h);
u_int32_t fullhash; u_int32_t fullhash;
u_int64_t i;
fprintf(f, "Block translation:");
for (i=0; i<brt->h->translated_blocknum_limit; i++) {
fprintf(f, " %"PRIu64": %"PRId64" %"PRId64"", i, brt->h->block_translation[i].diskoff, brt->h->block_translation[i].size);
}
fprintf(f, "\n");
rootp = toku_calculate_root_offset_pointer(brt, &fullhash); rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
return toku_dump_brtnode(f, brt, *rootp, 0, 0, 0, 0, 0); return toku_dump_brtnode(f, brt, *rootp, 0, 0, 0, 0, 0);
} }
...@@ -113,4 +113,6 @@ enum brt_header_flags { ...@@ -113,4 +113,6 @@ enum brt_header_flags {
int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater); int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater);
void extend_block_translation (BLOCKNUM blocknum, struct brt_header *h);
#endif #endif
...@@ -291,7 +291,14 @@ main (int argc, const char *argv[]) { ...@@ -291,7 +291,14 @@ main (int argc, const char *argv[]) {
} }
} else { } else {
BLOCKNUM blocknum; BLOCKNUM blocknum;
printf("Block translation:");
for (blocknum.b=0; blocknum.b<h->unused_blocks.b; blocknum.b++) {
printf(" %" PRIu64 ":", blocknum.b);
if (h->block_translation[blocknum.b].size == -1) printf("free");
else printf("%" PRIu64 ":%" PRIu64, h->block_translation[blocknum.b].diskoff, h->block_translation[blocknum.b].size);
}
for (blocknum.b=1; blocknum.b<h->unused_blocks.b; blocknum.b++) { for (blocknum.b=1; blocknum.b<h->unused_blocks.b; blocknum.b++) {
if (h->block_translation[blocknum.b].size != -1)
dump_node(f, blocknum, h); dump_node(f, blocknum, h);
} }
} }
......
...@@ -1089,6 +1089,7 @@ static void test_new_brt_cursor_last(int n, int dup_mode) { ...@@ -1089,6 +1089,7 @@ static void test_new_brt_cursor_last(int n, int dup_mode) {
memcpy(&vv, val.data, val.size); memcpy(&vv, val.data, val.size);
assert(vv == (int) htonl(i)); assert(vv == (int) htonl(i));
//if (n==512 && i<=360) { printf("i=%d\n", i); toku_dump_brt(stdout, t); }
r = toku_brt_cursor_delete(cursor, 0, null_txn); assert(r == 0); r = toku_brt_cursor_delete(cursor, 0, null_txn); assert(r == 0);
} }
assert(i == -1); assert(i == -1);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment