Commit d33980af authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul Committed by Yoni Fogel

Manage the header not in the cachetable. This will help with #1054. ...

Manage the header not in the cachetable.  This will help with #1054.   Addresses #1000, #1054, #1080, #1131.

git-svn-id: file:///svn/tokudb.1131b+1080a@6128 c7de825b-a66e-492c-adef-691d508d4ae1
parent 4103a85c
...@@ -119,9 +119,11 @@ struct block_translation_pair { ...@@ -119,9 +119,11 @@ struct block_translation_pair {
DISKOFF size; DISKOFF size;
}; };
// The brt_header is not managed by the cachetable. Instead, it hangs off the cachefile as userdata.
struct brt_header { struct brt_header {
int refcount;
int dirty; int dirty;
u_int32_t fullhash;
int layout_version; int layout_version;
unsigned int nodesize; unsigned int nodesize;
int n_named_roots; /* -1 if the only one is unnamed */ int n_named_roots; /* -1 if the only one is unnamed */
...@@ -172,6 +174,7 @@ struct brt { ...@@ -172,6 +174,7 @@ struct brt {
OMT txns; // transactions that are using this OMT (note that the transaction checks the cf also) OMT txns; // transactions that are using this OMT (note that the transaction checks the cf also)
u_int64_t txn_that_created; // which txn created it. Use 0 if no such txn. u_int64_t txn_that_created; // which txn created it. Use 0 if no such txn.
u_int64_t root_put_counter; u_int64_t root_put_counter;
}; };
/* serialization code */ /* serialization code */
...@@ -185,7 +188,7 @@ void toku_verify_counts(BRTNODE); ...@@ -185,7 +188,7 @@ void toku_verify_counts(BRTNODE);
int toku_serialize_brt_header_size (struct brt_header *h); int toku_serialize_brt_header_size (struct brt_header *h);
int toku_serialize_brt_header_to (int fd, struct brt_header *h); int toku_serialize_brt_header_to (int fd, struct brt_header *h);
int toku_serialize_brt_header_to_wbuf (struct wbuf *, struct brt_header *h); int toku_serialize_brt_header_to_wbuf (struct wbuf *, struct brt_header *h);
int toku_deserialize_brtheader_from (int fd, BLOCKNUM off, u_int32_t fullhash, struct brt_header **brth); int toku_deserialize_brtheader_from (int fd, BLOCKNUM off, struct brt_header **brth);
int toku_serialize_fifo_at (int fd, off_t freeoff, FIFO fifo); // Write a fifo into a disk, without worrying about fitting it into a block. This write is done at the end of the file. int toku_serialize_fifo_at (int fd, off_t freeoff, FIFO fifo); // Write a fifo into a disk, without worrying about fitting it into a block. This write is done at the end of the file.
...@@ -204,10 +207,9 @@ struct brtenv { ...@@ -204,10 +207,9 @@ struct brtenv {
// SPINLOCK checkpointing; // SPINLOCK checkpointing;
}; };
extern void toku_brtnode_flush_callback(), toku_brtheader_flush_callback(); extern void toku_brtnode_flush_callback (CACHEFILE cachefile, BLOCKNUM nodename, void *brtnode_v, void *extraargs, long size, BOOL write_me, BOOL keep_me, LSN modified_lsn, BOOL rename_p);
extern int toku_brtnode_fetch_callback(), toku_brtheader_fetch_callback(); extern int toku_brtnode_fetch_callback (CACHEFILE cachefile, BLOCKNUM nodename, u_int32_t fullhash, void **brtnode_pv, long *sizep, void*extraargs, LSN *written_lsn);
extern int toku_read_and_pin_brt_header (CACHEFILE cf, struct brt_header **header); extern int toku_read_brt_header_and_store_in_cachefile (CACHEFILE cf, struct brt_header **header);
extern int toku_unpin_brt_header (BRT brt);
extern CACHEKEY* toku_calculate_root_offset_pointer (BRT brt, u_int32_t *root_hash); extern CACHEKEY* toku_calculate_root_offset_pointer (BRT brt, u_int32_t *root_hash);
static const BRTNODE null_brtnode=0; static const BRTNODE null_brtnode=0;
...@@ -277,5 +279,6 @@ enum brt_layout_version_e { ...@@ -277,5 +279,6 @@ enum brt_layout_version_e {
}; };
void toku_brtheader_free (struct brt_header *h); void toku_brtheader_free (struct brt_header *h);
int toku_brtheader_close (CACHEFILE cachefile, void *header_v);
#endif #endif
...@@ -255,18 +255,26 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, BRT brt ...@@ -255,18 +255,26 @@ void toku_serialize_brtnode_to (int fd, BLOCKNUM blocknum, BRTNODE node, BRT brt
{ {
// If the node has never been written, then write the whole buffer, including the zeros // If the node has never been written, then write the whole buffer, including the zeros
assert(blocknum.b>=0); assert(blocknum.b>=0);
printf("%s:%d trans=%lu\n", __FILE__, __LINE__, brt->h->translated_blocknum_limit); printf("%s:%d brt=%p\n", __FILE__, __LINE__, brt);
if (brt->h->translated_blocknum_limit > (u_int64_t)blocknum.b) { printf("%s:%d translated_blocknum_limit=%lu blocknum.b=%lu\n", __FILE__, __LINE__, brt->h->translated_blocknum_limit, blocknum.b);
printf("%s:%d allocator=%p\n", __FILE__, __LINE__, brt->h->block_allocator);
printf("%s:%d bt=%p\n", __FILE__, __LINE__, brt->h->block_translation);
if (brt->h->translated_blocknum_limit <= (u_int64_t)blocknum.b) {
if (brt->h->block_translation == 0) assert(brt->h->translated_blocknum_limit==0);
u_int64_t new_limit = blocknum.b + 1; u_int64_t new_limit = blocknum.b + 1;
u_int64_t old_limit = brt->h->translated_blocknum_limit; u_int64_t old_limit = brt->h->translated_blocknum_limit;
u_int64_t j;
XREALLOC_N(new_limit, brt->h->block_translation); XREALLOC_N(new_limit, brt->h->block_translation);
while (++old_limit < new_limit) { for (j=old_limit; j<new_limit; j++) {
brt->h->block_translation[old_limit].diskoff = 0; brt->h->block_translation[j].diskoff = 0;
brt->h->block_translation[old_limit].size = 0; brt->h->block_translation[j].size = 0;
} }
brt->h->translated_blocknum_limit = new_limit; brt->h->translated_blocknum_limit = new_limit;
} else { }
if (brt->h->block_translation[blocknum.b].size > 0) {
block_allocator_free_block(brt->h->block_allocator, brt->h->block_translation[blocknum.b].diskoff); block_allocator_free_block(brt->h->block_allocator, brt->h->block_translation[blocknum.b].diskoff);
brt->h->block_translation[blocknum.b].diskoff = 0;
brt->h->block_translation[blocknum.b].size = 0;
} }
size_t n_to_write = uncompressed_magic_len + compression_header_len + compressed_len; size_t n_to_write = uncompressed_magic_len + compression_header_len + compressed_len;
u_int64_t offset; u_int64_t offset;
...@@ -646,16 +654,17 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) { ...@@ -646,16 +654,17 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) {
} }
{ {
struct wbuf w; struct wbuf w;
u_int64_t size = 4 + h->translated_blocknum_limit * 8; // 4 for the checksum u_int64_t size = 4 + h->translated_blocknum_limit * 16; // 4 for the checksum
printf("%s:%d writing translation table of size %ld\n", __FILE__, __LINE__, size); printf("%s:%d writing translation table of size %ld\n", __FILE__, __LINE__, size);
wbuf_init(&w, toku_malloc(size), size); wbuf_init(&w, toku_malloc(size), size);
u_int64_t i; u_int64_t i;
for (i=0; i<h->translated_blocknum_limit; i++) { for (i=0; i<h->translated_blocknum_limit; i++) {
printf("%s:%d %ld,%ld\n", __FILE__, __LINE__, h->block_translation[i].diskoff, h->block_translation[i].size);
wbuf_ulonglong(&w, h->block_translation[i].diskoff); wbuf_ulonglong(&w, h->block_translation[i].diskoff);
wbuf_ulonglong(&w, h->block_translation[i].size); wbuf_ulonglong(&w, h->block_translation[i].size);
} }
u_int32_t checksum = x1764_finish(&w.checksum); u_int32_t checksum = x1764_finish(&w.checksum);
printf("%s:%d writing to %ld, checksum=%d offset=%d size=%ld\n", __FILE__, __LINE__, h->block_translation_address_on_disk, checksum, w.ndone, size); printf("%s:%d writing to %d\n", __FILE__, __LINE__, checksum);
wbuf_int(&w, checksum); wbuf_int(&w, checksum);
ssize_t nwrote = pwrite(fd, w.buf, size, h->block_translation_address_on_disk); ssize_t nwrote = pwrite(fd, w.buf, size, h->block_translation_address_on_disk);
assert(nwrote==(ssize_t)size); assert(nwrote==(ssize_t)size);
...@@ -664,12 +673,12 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) { ...@@ -664,12 +673,12 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) {
return 0; return 0;
} }
int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_header **brth, u_int32_t fullhash) { // We only deserialize brt header once and then share everything with all the brts.
int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_header **brth) {
// We already know the first 8 bytes are "tokudata", and we read in the size. // We already know the first 8 bytes are "tokudata", and we read in the size.
struct brt_header *MALLOC(h); struct brt_header *MALLOC(h);
if (h==0) return errno; if (h==0) return errno;
int ret=-1; int ret=-1;
h->fullhash = fullhash;
if (0) { died0: toku_free(h); return ret; } if (0) { died0: toku_free(h); return ret; }
struct rbuf rc; struct rbuf rc;
rc.buf = toku_malloc(size-12); // we can skip the first 12 bytes. rc.buf = toku_malloc(size-12); // we can skip the first 12 bytes.
...@@ -689,10 +698,11 @@ int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_heade ...@@ -689,10 +698,11 @@ int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_heade
h->unused_blocks = rbuf_blocknum(&rc); h->unused_blocks = rbuf_blocknum(&rc);
h->n_named_roots = rbuf_int(&rc); h->n_named_roots = rbuf_int(&rc);
h->translated_blocknum_limit = rbuf_diskoff(&rc); h->translated_blocknum_limit = rbuf_diskoff(&rc);
h->block_translation_size_on_disk = 4 + 8 * h->translated_blocknum_limit; h->block_translation_size_on_disk = 4 + 16 * h->translated_blocknum_limit;
h->block_translation_address_on_disk = rbuf_diskoff(&rc); h->block_translation_address_on_disk = rbuf_diskoff(&rc);
// Set up the the block translation buffer. // Set up the the block translation buffer.
create_block_allocator(&h->block_allocator, h->nodesize); create_block_allocator(&h->block_allocator, h->nodesize);
printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, h->translated_blocknum_limit, h->block_translation_address_on_disk);
if (h->block_translation_address_on_disk == 0) { if (h->block_translation_address_on_disk == 0) {
h->block_translation = 0; h->block_translation = 0;
} else { } else {
...@@ -763,7 +773,7 @@ int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_heade ...@@ -763,7 +773,7 @@ int deserialize_brtheader (u_int32_t size, int fd, DISKOFF off, struct brt_heade
return 0; return 0;
} }
int toku_deserialize_brtheader_from (int fd, BLOCKNUM blocknum, u_int32_t fullhash, struct brt_header **brth) { int toku_deserialize_brtheader_from (int fd, BLOCKNUM blocknum, struct brt_header **brth) {
//printf("%s:%d calling MALLOC\n", __FILE__, __LINE__); //printf("%s:%d calling MALLOC\n", __FILE__, __LINE__);
assert(blocknum.b==0); assert(blocknum.b==0);
DISKOFF offset = 0; DISKOFF offset = 0;
...@@ -776,7 +786,7 @@ int toku_deserialize_brtheader_from (int fd, BLOCKNUM blocknum, u_int32_t fullha ...@@ -776,7 +786,7 @@ int toku_deserialize_brtheader_from (int fd, BLOCKNUM blocknum, u_int32_t fullha
if (r!=12) return EINVAL; if (r!=12) return EINVAL;
assert(memcmp(magic,"tokudata",8)==0); assert(memcmp(magic,"tokudata",8)==0);
// It's version 7 or later, and the magi clooks OK // It's version 7 or later, and the magi clooks OK
return deserialize_brtheader(ntohl(*(int*)(&magic[8])), fd, offset, brth, fullhash); return deserialize_brtheader(ntohl(*(int*)(&magic[8])), fd, offset, brth);
} }
unsigned int toku_brt_pivot_key_len (BRT brt, struct kv_pair *pk) { unsigned int toku_brt_pivot_key_len (BRT brt, struct kv_pair *pk) {
......
...@@ -3,15 +3,13 @@ ...@@ -3,15 +3,13 @@
int toku_testsetup_leaf(BRT brt, BLOCKNUM *blocknum) { int toku_testsetup_leaf(BRT brt, BLOCKNUM *blocknum) {
BRTNODE node; BRTNODE node;
int r = toku_read_and_pin_brt_header(brt->cf, &brt->h); int r = toku_read_brt_header_and_store_in_cachefile(brt->cf, &brt->h);
if (r!=0) return r; if (r!=0) return r;
toku_create_new_brtnode(brt, &node, 0, (TOKULOGGER)0); toku_create_new_brtnode(brt, &node, 0, (TOKULOGGER)0);
*blocknum = node->thisnodename; *blocknum = node->thisnodename;
r = toku_unpin_brtnode(brt, node); r = toku_unpin_brtnode(brt, node);
if (r!=0) return r; if (r!=0) return r;
r = toku_unpin_brt_header(brt);
if (r!=0) return r;
return 0; return 0;
} }
...@@ -19,7 +17,7 @@ int toku_testsetup_leaf(BRT brt, BLOCKNUM *blocknum) { ...@@ -19,7 +17,7 @@ int toku_testsetup_leaf(BRT brt, BLOCKNUM *blocknum) {
int toku_testsetup_nonleaf (BRT brt, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, u_int32_t *subtree_fingerprints, char **keys, int *keylens) { int toku_testsetup_nonleaf (BRT brt, int height, BLOCKNUM *blocknum, int n_children, BLOCKNUM *children, u_int32_t *subtree_fingerprints, char **keys, int *keylens) {
BRTNODE node; BRTNODE node;
assert(n_children<=BRT_FANOUT); assert(n_children<=BRT_FANOUT);
int r = toku_read_and_pin_brt_header(brt->cf, &brt->h); int r = toku_read_brt_header_and_store_in_cachefile(brt->cf, &brt->h);
if (r!=0) return r; if (r!=0) return r;
toku_create_new_brtnode(brt, &node, height, (TOKULOGGER)0); toku_create_new_brtnode(brt, &node, height, (TOKULOGGER)0);
node->u.n.n_children=n_children; node->u.n.n_children=n_children;
...@@ -40,20 +38,15 @@ int toku_testsetup_nonleaf (BRT brt, int height, BLOCKNUM *blocknum, int n_child ...@@ -40,20 +38,15 @@ int toku_testsetup_nonleaf (BRT brt, int height, BLOCKNUM *blocknum, int n_child
node->u.n.totalchildkeylens += keylens[i]; node->u.n.totalchildkeylens += keylens[i];
} }
*blocknum = node->thisnodename; *blocknum = node->thisnodename;
r = toku_unpin_brtnode(brt, node); return toku_unpin_brtnode(brt, node);
if (r!=0) return r;
r = toku_unpin_brt_header(brt);
if (r!=0) return r;
return 0;
} }
int toku_testsetup_root(BRT brt, BLOCKNUM blocknum) { int toku_testsetup_root(BRT brt, BLOCKNUM blocknum) {
int r = toku_read_and_pin_brt_header(brt->cf, &brt->h); int r = toku_read_brt_header_and_store_in_cachefile(brt->cf, &brt->h);
if (r!=0) return r; if (r!=0) return r;
brt->h->roots[0] = blocknum; brt->h->roots[0] = blocknum;
brt->h->root_hashes[0].valid = FALSE; brt->h->root_hashes[0].valid = FALSE;
r = toku_unpin_brt_header(brt); return 0;
return r;
} }
int toku_testsetup_get_sersize(BRT brt, BLOCKNUM diskoff) // Return the size on disk int toku_testsetup_get_sersize(BRT brt, BLOCKNUM diskoff) // Return the size on disk
......
...@@ -154,15 +154,9 @@ int toku_verify_brtnode (BRT brt, BLOCKNUM blocknum, bytevec lorange, ITEMLEN lo ...@@ -154,15 +154,9 @@ int toku_verify_brtnode (BRT brt, BLOCKNUM blocknum, bytevec lorange, ITEMLEN lo
} }
int toku_verify_brt (BRT brt) { int toku_verify_brt (BRT brt) {
int r;
CACHEKEY *rootp; CACHEKEY *rootp;
if ((r = toku_read_and_pin_brt_header(brt->cf, &brt->h))) { assert(brt->h);
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
u_int32_t root_hash; u_int32_t root_hash;
rootp = toku_calculate_root_offset_pointer(brt, &root_hash); rootp = toku_calculate_root_offset_pointer(brt, &root_hash);
if ((r=toku_verify_brtnode(brt, *rootp, 0, 0, 0, 0, 1))) goto died0; return toku_verify_brtnode(brt, *rootp, 0, 0, 0, 0, 1);
if ((r = toku_unpin_brt_header(brt))!=0) return r;
return 0;
} }
...@@ -214,28 +214,18 @@ void toku_brtheader_free (struct brt_header *h) { ...@@ -214,28 +214,18 @@ void toku_brtheader_free (struct brt_header *h) {
toku_free(h); toku_free(h);
} }
void toku_brtheader_flush_callback (CACHEFILE cachefile, int toku_brtheader_close (CACHEFILE cachefile, void *header_v) {
BLOCKNUM nodename,
void *header_v,
void *extra_args __attribute__((__unused__)),
long size __attribute__((unused)),
BOOL write_me,
BOOL keep_me,
LSN lsn __attribute__((__unused__)),
BOOL rename_p __attribute__((__unused__))) {
struct brt_header *h = header_v; struct brt_header *h = header_v;
assert(nodename.b==0); if (h->dirty) {
assert(!h->dirty); // shouldn't be dirty once it is unpinned.
if (write_me) {
toku_serialize_brt_header_to(toku_cachefile_fd(cachefile), h); toku_serialize_brt_header_to(toku_cachefile_fd(cachefile), h);
toku_serialize_fifo_at(toku_cachefile_fd(cachefile), h->unused_blocks.b*h->nodesize, h->fifo); toku_serialize_fifo_at(toku_cachefile_fd(cachefile), h->unused_blocks.b*h->nodesize, h->fifo);
} }
if (!keep_me) {
toku_brtheader_free(h); toku_brtheader_free(h);
} return 0;
} }
int toku_brtheader_fetch_callback (CACHEFILE cachefile, BLOCKNUM nodename, u_int32_t fullhash, void **headerp_v, long *sizep __attribute__((unused)), void*extraargs __attribute__((__unused__)), LSN *written_lsn) { #if 0
static int toku_brtheader_fetch_callback (CACHEFILE cachefile, BLOCKNUM nodename, u_int32_t fullhash, void **headerp_v, long *sizep __attribute__((unused)), void*extraargs __attribute__((__unused__)), LSN *written_lsn) {
int r; int r;
struct brt_header **h = (struct brt_header **)headerp_v; struct brt_header **h = (struct brt_header **)headerp_v;
assert(nodename.b==0); assert(nodename.b==0);
...@@ -245,30 +235,27 @@ int toku_brtheader_fetch_callback (CACHEFILE cachefile, BLOCKNUM nodename, u_int ...@@ -245,30 +235,27 @@ int toku_brtheader_fetch_callback (CACHEFILE cachefile, BLOCKNUM nodename, u_int
assert((*h)->free_blocks.b==-1); assert((*h)->free_blocks.b==-1);
return 0; return 0;
} }
#endif
int toku_read_and_pin_brt_header (CACHEFILE cf, struct brt_header **header) { int toku_read_brt_header_and_store_in_cachefile (CACHEFILE cf, struct brt_header **header)
void *header_p; // If the cachefile already has the header, then just get it.
//fprintf(stderr, "%s:%d read_and_pin_brt_header(...)\n", __FILE__, __LINE__); // If the cachefile has not been initialized, then don't modify anything.
u_int32_t fullhash = toku_cachefile_fullhash_of_header(cf); {
BLOCKNUM blocknum = make_blocknum(0); {
int r = toku_cachetable_get_and_pin(cf, blocknum, fullhash, &header_p, NULL, struct brt_header *h;
toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0); if ((h=toku_cachefile_get_userdata(cf))!=0) {
*header = h;
return 0;
}
}
struct brt_header *h;
int r = toku_deserialize_brtheader_from(toku_cachefile_fd(cf), make_blocknum(0), &h);
if (r!=0) return r; if (r!=0) return r;
struct brt_header *bheader = header_p; toku_cachefile_set_userdata(cf, (void*)h, toku_brtheader_close);
assert(bheader->fullhash==fullhash); *header = h;
*header = bheader;
assert((*header)->free_blocks.b==-1);
return 0; return 0;
} }
int toku_unpin_brt_header (BRT brt) {
int dirty = brt->h->dirty;
brt->h->dirty=0; // Unpinning it may make it go way.
BLOCKNUM blocknum = make_blocknum(0);
int r = toku_cachetable_unpin(brt->cf, blocknum, brt->h->fullhash, dirty, 0);
brt->h=0;
return r;
}
int toku_unpin_brtnode (BRT brt, BRTNODE node) { int toku_unpin_brtnode (BRT brt, BRTNODE node) {
// if (node->dirty && txn) { // if (node->dirty && txn) {
// // For now just update the log_lsn. Later we'll have to deal with the checksums. // // For now just update the log_lsn. Later we'll have to deal with the checksums.
...@@ -2147,6 +2134,7 @@ static int brt_open_file(BRT brt, const char *fname, const char *fname_in_env, i ...@@ -2147,6 +2134,7 @@ static int brt_open_file(BRT brt, const char *fname, const char *fname_in_env, i
} }
// allocate and initialize a brt header. // allocate and initialize a brt header.
// t->cf is not set to anything.
static int brt_alloc_init_header(BRT t, const char *dbname, TOKUTXN txn) { static int brt_alloc_init_header(BRT t, const char *dbname, TOKUTXN txn) {
int r; int r;
BLOCKNUM root = make_blocknum(1); BLOCKNUM root = make_blocknum(1);
...@@ -2169,6 +2157,7 @@ static int brt_alloc_init_header(BRT t, const char *dbname, TOKUTXN txn) { ...@@ -2169,6 +2157,7 @@ static int brt_alloc_init_header(BRT t, const char *dbname, TOKUTXN txn) {
t->h->block_translation = 0; t->h->block_translation = 0;
t->h->block_translation_size_on_disk = 0; t->h->block_translation_size_on_disk = 0;
t->h->block_translation_address_on_disk = 0; t->h->block_translation_address_on_disk = 0;
printf("%s:%d translated_blocknum_limit=%ld, block_translation_address_on_disk=%ld\n", __FILE__, __LINE__, t->h->translated_blocknum_limit, t->h->block_translation_address_on_disk);
create_block_allocator(&t->h->block_allocator, t->nodesize); create_block_allocator(&t->h->block_allocator, t->nodesize);
toku_fifo_create(&t->h->fifo); toku_fifo_create(&t->h->fifo);
t->root_put_counter = global_root_put_counter++; t->root_put_counter = global_root_put_counter++;
...@@ -2206,10 +2195,8 @@ static int brt_alloc_init_header(BRT t, const char *dbname, TOKUTXN txn) { ...@@ -2206,10 +2195,8 @@ static int brt_alloc_init_header(BRT t, const char *dbname, TOKUTXN txn) {
} }
if ((r=setup_initial_brt_root_node(t, root, toku_txn_logger(txn)))!=0) { goto died7; } if ((r=setup_initial_brt_root_node(t, root, toku_txn_logger(txn)))!=0) { goto died7; }
//printf("%s:%d putting %p (%d)\n", __FILE__, __LINE__, t->h, 0); //printf("%s:%d putting %p (%d)\n", __FILE__, __LINE__, t->h, 0);
u_int32_t fullhash = toku_cachefile_fullhash_of_header(t->cf);
t->h->fullhash = fullhash;
assert(t->h->free_blocks.b==-1); assert(t->h->free_blocks.b==-1);
if ((r=toku_cachetable_put(t->cf, header_blocknum, fullhash, t->h, 0, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0))) { goto died7; } toku_cachefile_set_userdata(t->cf, t->h, toku_brtheader_close);
return r; return r;
} }
...@@ -2255,7 +2242,7 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char ...@@ -2255,7 +2242,7 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char
toku_logger_log_fopen(txn, fname_in_env, toku_cachefile_filenum(t->cf)); toku_logger_log_fopen(txn, fname_in_env, toku_cachefile_filenum(t->cf));
} }
if (r!=0) { if (r!=0) {
if (0) { died1: toku_cachefile_close(&t->cf, toku_txn_logger(txn)); } if (0) { died_after_open: toku_cachefile_close(&t->cf, toku_txn_logger(txn)); }
t->database_name = 0; t->database_name = 0;
goto died0a; goto died0a;
} }
...@@ -2263,11 +2250,10 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char ...@@ -2263,11 +2250,10 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); toku_print_malloced_items(); //printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); toku_print_malloced_items();
if (0) { if (0) {
died_after_read_and_pin: died_after_read_and_pin:
toku_cachetable_unpin(t->cf, header_blocknum, toku_cachefile_fullhash_of_header(t->cf), 0, 0); // unpin the header goto died_after_open;
goto died1;
} }
if (is_create) { if (is_create) {
r = toku_read_and_pin_brt_header(t->cf, &t->h); r = toku_read_brt_header_and_store_in_cachefile(t->cf, &t->h);
if (r==-1) { if (r==-1) {
r = brt_alloc_init_header(t, dbname, txn); r = brt_alloc_init_header(t, dbname, txn);
if (r != 0) goto died_after_read_and_pin; if (r != 0) goto died_after_read_and_pin;
...@@ -2308,7 +2294,7 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char ...@@ -2308,7 +2294,7 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char
if ((r=setup_initial_brt_root_node(t, t->h->roots[t->h->n_named_roots-1], toku_txn_logger(txn)))!=0) goto died_after_read_and_pin; if ((r=setup_initial_brt_root_node(t, t->h->roots[t->h->n_named_roots-1], toku_txn_logger(txn)))!=0) goto died_after_read_and_pin;
} }
} else { } else {
if ((r = toku_read_and_pin_brt_header(t->cf, &t->h))!=0) goto died1; if ((r = toku_read_brt_header_and_store_in_cachefile(t->cf, &t->h))!=0) goto died_after_open;
if (!dbname) { if (!dbname) {
if (t->h->n_named_roots!=-1) { r = EINVAL; goto died_after_read_and_pin; } // requires a subdb if (t->h->n_named_roots!=-1) { r = EINVAL; goto died_after_read_and_pin; } // requires a subdb
db_index=0; db_index=0;
...@@ -2337,8 +2323,6 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char ...@@ -2337,8 +2323,6 @@ int toku_brt_open(BRT t, const char *fname, const char *fname_in_env, const char
} }
} }
assert(t->h); assert(t->h);
if ((r = toku_unpin_brt_header(t)) !=0) goto died1; // it's unpinned
assert(t->h==0);
WHEN_BRTTRACE(fprintf(stderr, "BRTTRACE -> %p\n", t)); WHEN_BRTTRACE(fprintf(stderr, "BRTTRACE -> %p\n", t));
return 0; return 0;
} }
...@@ -2358,26 +2342,19 @@ int toku_brt_reopen(BRT brt, const char *fname, const char *fname_in_env, TOKUTX ...@@ -2358,26 +2342,19 @@ int toku_brt_reopen(BRT brt, const char *fname, const char *fname_in_env, TOKUTX
// init the tree header // init the tree header
assert(brt->h == 0); assert(brt->h == 0);
r = toku_read_and_pin_brt_header(brt->cf, &brt->h); r = toku_read_brt_header_and_store_in_cachefile(brt->cf, &brt->h);
if (r == -1) { if (r == -1) {
r = brt_alloc_init_header(brt, NULL, txn); r = brt_alloc_init_header(brt, NULL, txn);
assert(r == 0);
r = toku_unpin_brt_header(brt);
} }
return r; return r;
} }
int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags) { int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags) {
int r;
int r2 = 0;
int i; int i;
int found = -1; int found = -1;
assert(flags == 0); assert(flags == 0);
r = toku_read_and_pin_brt_header(brt->cf, &brt->h); assert(brt->h);
//TODO: What if r != 0? Is this possible?
// We just called toku_brt_open, so it should exist...
assert(r==0);
assert(brt->h->n_named_roots>=0); assert(brt->h->n_named_roots>=0);
for (i = 0; i < brt->h->n_named_roots; i++) { for (i = 0; i < brt->h->n_named_roots; i++) {
...@@ -2388,8 +2365,7 @@ int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags) { ...@@ -2388,8 +2365,7 @@ int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags) {
} }
if (found == -1) { if (found == -1) {
//Should not be possible. //Should not be possible.
r = ENOENT; return ENOENT;
goto error;
} }
//Free old db name //Free old db name
toku_free(brt->h->names[found]); toku_free(brt->h->names[found]);
...@@ -2403,15 +2379,11 @@ int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags) { ...@@ -2403,15 +2379,11 @@ int toku_brt_remove_subdb(BRT brt, const char *dbname, u_int32_t flags) {
brt->h->n_named_roots--; brt->h->n_named_roots--;
brt->h->dirty = 1; brt->h->dirty = 1;
// Q: What if n_named_roots becomes 0? A: Don't do anything. an empty list of named roots is OK. // Q: What if n_named_roots becomes 0? A: Don't do anything. an empty list of named roots is OK.
if ((brt->h->names = toku_realloc(brt->h->names, (brt->h->n_named_roots)*sizeof(*brt->h->names))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto error; } XREALLOC_N(brt->h->n_named_roots, brt->h->names);
if ((brt->h->roots = toku_realloc(brt->h->roots, (brt->h->n_named_roots)*sizeof(*brt->h->roots))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto error; } XREALLOC_N(brt->h->n_named_roots, brt->h->roots);
if ((brt->h->root_hashes = toku_realloc(brt->h->root_hashes, (brt->h->n_named_roots)*sizeof(*brt->h->root_hashes))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto error; } XREALLOC_N(brt->h->n_named_roots, brt->h->root_hashes);
return 0;
error:
r2 = toku_unpin_brt_header(brt);
assert(r2==0);//TODO: Can r2 be non 0?
assert(brt->h==0);
return r ? r : r2;
} }
// This one has no env // This one has no env
...@@ -2459,6 +2431,7 @@ int toku_close_brt (BRT brt, TOKULOGGER logger) { ...@@ -2459,6 +2431,7 @@ int toku_close_brt (BRT brt, TOKULOGGER logger) {
} }
assert(0==toku_cachefile_count_pinned(brt->cf, 1)); // For the brt, the pinned count should be zero. assert(0==toku_cachefile_count_pinned(brt->cf, 1)); // For the brt, the pinned count should be zero.
//printf("%s:%d closing cachetable\n", __FILE__, __LINE__); //printf("%s:%d closing cachetable\n", __FILE__, __LINE__);
printf("%s:%d brt=%p ,brt->h=%p\n", __FILE__, __LINE__, brt, brt->h);
if ((r = toku_cachefile_close(&brt->cf, logger))!=0) return r; if ((r = toku_cachefile_close(&brt->cf, logger))!=0) return r;
} }
if (brt->database_name) toku_free(brt->database_name); if (brt->database_name) toku_free(brt->database_name);
...@@ -2563,9 +2536,8 @@ static int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, ...@@ -2563,9 +2536,8 @@ static int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk,
int toku_cachefile_root_put_cmd (CACHEFILE cf, BRT_CMD cmd, TOKULOGGER logger) { int toku_cachefile_root_put_cmd (CACHEFILE cf, BRT_CMD cmd, TOKULOGGER logger) {
int r; int r;
struct brt_header *h; struct brt_header *h = toku_cachefile_get_userdata(cf);
r = toku_read_and_pin_brt_header(cf, &h); assert(h);
if (r!=0) return r;
r = toku_fifo_enq_cmdstruct(h->fifo, cmd); r = toku_fifo_enq_cmdstruct(h->fifo, cmd);
if (r!=0) return r; if (r!=0) return r;
{ {
...@@ -2574,8 +2546,6 @@ int toku_cachefile_root_put_cmd (CACHEFILE cf, BRT_CMD cmd, TOKULOGGER logger) { ...@@ -2574,8 +2546,6 @@ int toku_cachefile_root_put_cmd (CACHEFILE cf, BRT_CMD cmd, TOKULOGGER logger) {
r = toku_log_enqrootentry(logger, (LSN*)0, 0, toku_cachefile_filenum(cf), cmd->xid, cmd->type, keybs, valbs); r = toku_log_enqrootentry(logger, (LSN*)0, 0, toku_cachefile_filenum(cf), cmd->xid, cmd->type, keybs, valbs);
if (r!=0) return r; if (r!=0) return r;
} }
h->dirty = 0;
r = toku_cachetable_unpin(cf, header_blocknum, h->fullhash, 1, 0);
return 0; return 0;
} }
...@@ -2611,10 +2581,7 @@ int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger) { ...@@ -2611,10 +2581,7 @@ int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger) {
CACHEKEY *rootp; CACHEKEY *rootp;
int r; int r;
//assert(0==toku_cachetable_assert_all_unpinned(brt->cachetable)); //assert(0==toku_cachetable_assert_all_unpinned(brt->cachetable));
if ((r = toku_read_and_pin_brt_header(brt->cf, &brt->h))) { assert(brt->h);
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
brt->root_put_counter = global_root_put_counter++; brt->root_put_counter = global_root_put_counter++;
u_int32_t fullhash; u_int32_t fullhash;
...@@ -2622,7 +2589,7 @@ int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger) { ...@@ -2622,7 +2589,7 @@ int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger) {
//assert(fullhash==toku_cachetable_hash(brt->cf, *rootp)); //assert(fullhash==toku_cachetable_hash(brt->cf, *rootp));
if ((r=toku_cachetable_get_and_pin(brt->cf, *rootp, fullhash, &node_v, NULL, if ((r=toku_cachetable_get_and_pin(brt->cf, *rootp, fullhash, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt))) { toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt))) {
goto died0; return r;
} }
//printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v); //printf("%s:%d pin %p\n", __FILE__, __LINE__, node_v);
node=node_v; node=node_v;
...@@ -2642,8 +2609,6 @@ int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger) { ...@@ -2642,8 +2609,6 @@ int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger) {
if ((r = push_something(brt, &node, rootp, cmd, logger))) return r; if ((r = push_something(brt, &node, rootp, cmd, logger))) return r;
r = toku_unpin_brtnode(brt, node); r = toku_unpin_brtnode(brt, node);
assert(r == 0); assert(r == 0);
r = toku_unpin_brt_header(brt);
assert(r == 0);
return 0; return 0;
} }
...@@ -2768,20 +2733,12 @@ int toku_dump_brtnode (BRT brt, BLOCKNUM blocknum, int depth, bytevec lorange, I ...@@ -2768,20 +2733,12 @@ int toku_dump_brtnode (BRT brt, BLOCKNUM blocknum, int depth, bytevec lorange, I
} }
int toku_dump_brt (BRT brt) { int toku_dump_brt (BRT brt) {
int r;
CACHEKEY *rootp; CACHEKEY *rootp;
struct brt_header *prev_header = brt->h; assert(brt->h);
if ((r = toku_read_and_pin_brt_header(brt->cf, &brt->h))) {
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
u_int32_t fullhash; u_int32_t fullhash;
rootp = toku_calculate_root_offset_pointer(brt, &fullhash); rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
printf("split_count=%d\n", split_count); printf("split_count=%d\n", split_count);
if ((r = toku_dump_brtnode(brt, *rootp, 0, 0, 0, 0, 0))) goto died0; return toku_dump_brtnode(brt, *rootp, 0, 0, 0, 0, 0);
if ((r = toku_unpin_brt_header(brt))!=0) return r;
brt->h = prev_header;
return 0;
} }
#if 0 #if 0
...@@ -3040,8 +2997,7 @@ int toku_brt_search(BRT brt, brt_search_t *search, DBT *newkey, DBT *newval, TOK ...@@ -3040,8 +2997,7 @@ int toku_brt_search(BRT brt, brt_search_t *search, DBT *newkey, DBT *newval, TOK
{ {
int r, rr; int r, rr;
rr = toku_read_and_pin_brt_header(brt->cf, &brt->h); assert(brt->h);
assert(rr == 0);
*root_put_counter = brt->root_put_counter; *root_put_counter = brt->root_put_counter;
...@@ -3084,9 +3040,6 @@ int toku_brt_search(BRT brt, brt_search_t *search, DBT *newkey, DBT *newval, TOK ...@@ -3084,9 +3040,6 @@ int toku_brt_search(BRT brt, brt_search_t *search, DBT *newkey, DBT *newval, TOK
rr = toku_unpin_brtnode(brt, node); rr = toku_unpin_brtnode(brt, node);
assert(rr == 0); assert(rr == 0);
rr = toku_unpin_brt_header(brt);
assert(rr == 0);
return r; return r;
} }
...@@ -3446,11 +3399,8 @@ static int brt_cursor_next_shortcut (BRT_CURSOR cursor, DBT *outkey, DBT *outval ...@@ -3446,11 +3399,8 @@ static int brt_cursor_next_shortcut (BRT_CURSOR cursor, DBT *outkey, DBT *outval
int toku_brt_cursor_peek_prev(BRT_CURSOR cursor, DBT *outkey, DBT *outval) { int toku_brt_cursor_peek_prev(BRT_CURSOR cursor, DBT *outkey, DBT *outval) {
if (toku_omt_cursor_is_valid(cursor->omtcursor)) { if (toku_omt_cursor_is_valid(cursor->omtcursor)) {
{ {
int rr = toku_read_and_pin_brt_header(cursor->brt->cf, &cursor->brt->h); assert(cursor->brt->h);
if (rr!=0) return rr;
u_int64_t h_counter = cursor->brt->root_put_counter; u_int64_t h_counter = cursor->brt->root_put_counter;
rr = toku_unpin_brt_header(cursor->brt);
assert(rr==0);
if (h_counter != cursor->root_put_counter) return -1; if (h_counter != cursor->root_put_counter) return -1;
} }
OMTVALUE le; OMTVALUE le;
...@@ -3475,11 +3425,8 @@ get_prev:; ...@@ -3475,11 +3425,8 @@ get_prev:;
int toku_brt_cursor_peek_next(BRT_CURSOR cursor, DBT *outkey, DBT *outval) { int toku_brt_cursor_peek_next(BRT_CURSOR cursor, DBT *outkey, DBT *outval) {
if (toku_omt_cursor_is_valid(cursor->omtcursor)) { if (toku_omt_cursor_is_valid(cursor->omtcursor)) {
{ {
int rr = toku_read_and_pin_brt_header(cursor->brt->cf, &cursor->brt->h); assert(cursor->brt->h);
if (rr!=0) return rr;
u_int64_t h_counter = cursor->brt->root_put_counter; u_int64_t h_counter = cursor->brt->root_put_counter;
rr = toku_unpin_brt_header(cursor->brt);
assert(rr==0);
if (h_counter != cursor->root_put_counter) return -1; if (h_counter != cursor->root_put_counter) return -1;
} }
OMTVALUE le; OMTVALUE le;
...@@ -3836,19 +3783,12 @@ static void toku_brt_keyrange_internal (BRT brt, CACHEKEY nodename, u_int32_t fu ...@@ -3836,19 +3783,12 @@ static void toku_brt_keyrange_internal (BRT brt, CACHEKEY nodename, u_int32_t fu
} }
int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater) { int toku_brt_keyrange (BRT brt, DBT *key, u_int64_t *less, u_int64_t *equal, u_int64_t *greater) {
{ assert(brt->h);
int rr = toku_read_and_pin_brt_header(brt->cf, &brt->h);
assert(rr == 0);
}
u_int32_t fullhash; u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash); CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
*less = *equal = *greater = 0; *less = *equal = *greater = 0;
toku_brt_keyrange_internal (brt, *rootp, fullhash, key, less, equal, greater); toku_brt_keyrange_internal (brt, *rootp, fullhash, key, less, equal, greater);
{
int rr = toku_unpin_brt_header(brt);
assert(rr == 0);
}
return 0; return 0;
} }
...@@ -3868,22 +3808,18 @@ int toku_brt_cursor_delete(BRT_CURSOR cursor, int flags, TOKUTXN txn) { ...@@ -3868,22 +3808,18 @@ int toku_brt_cursor_delete(BRT_CURSOR cursor, int flags, TOKUTXN txn) {
int toku_brt_height_of_root(BRT brt, int *height) { int toku_brt_height_of_root(BRT brt, int *height) {
// for an open brt, return the current height. // for an open brt, return the current height.
int r; int r;
if ((r = toku_read_and_pin_brt_header(brt->cf, &brt->h))) { assert(brt->h);
if (0) { died0: toku_unpin_brt_header(brt); }
return r;
}
u_int32_t fullhash; u_int32_t fullhash;
CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash); CACHEKEY *rootp = toku_calculate_root_offset_pointer(brt, &fullhash);
void *node_v; void *node_v;
//assert(fullhash == toku_cachetable_hash(brt->cf, *rootp)); //assert(fullhash == toku_cachetable_hash(brt->cf, *rootp));
if ((r=toku_cachetable_get_and_pin(brt->cf, *rootp, fullhash, &node_v, NULL, if ((r=toku_cachetable_get_and_pin(brt->cf, *rootp, fullhash, &node_v, NULL,
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt))) { toku_brtnode_flush_callback, toku_brtnode_fetch_callback, brt))) {
goto died0; return r;
} }
BRTNODE node = node_v; BRTNODE node = node_v;
*height = node->height; *height = node->height;
r = toku_unpin_brtnode(brt, node); assert(r==0); r = toku_unpin_brtnode(brt, node); assert(r==0);
r = toku_unpin_brt_header(brt); assert(r==0);
return 0; return 0;
} }
......
...@@ -26,7 +26,7 @@ void print_item (bytevec val, ITEMLEN len) { ...@@ -26,7 +26,7 @@ void print_item (bytevec val, ITEMLEN len) {
void dump_header (int f, struct brt_header **header) { void dump_header (int f, struct brt_header **header) {
struct brt_header *h; struct brt_header *h;
int r; int r;
r = toku_deserialize_brtheader_from (f, header_blocknum, 0/*pass 0 for hash. It doesn't matter.*/, &h); assert(r==0); r = toku_deserialize_brtheader_from (f, header_blocknum, &h); assert(r==0);
printf("brtheader:\n"); printf("brtheader:\n");
if (h->layout_version==BRT_LAYOUT_VERSION_6) printf(" layout_version<=6\n"); if (h->layout_version==BRT_LAYOUT_VERSION_6) printf(" layout_version<=6\n");
else printf(" layout_version=%d\n", h->layout_version); else printf(" layout_version=%d\n", h->layout_version);
......
...@@ -124,7 +124,6 @@ struct fileid { ...@@ -124,7 +124,6 @@ struct fileid {
struct cachefile { struct cachefile {
CACHEFILE next; CACHEFILE next;
u_int32_t header_fullhash;
u_int64_t refcount; /* CACHEFILEs are shared. Use a refcount to decide when to really close it. u_int64_t refcount; /* CACHEFILEs are shared. Use a refcount to decide when to really close it.
* The reference count is one for every open DB. * The reference count is one for every open DB.
* Plus one for every commit/rollback record. (It would be harder to keep a count for every open transaction, * Plus one for every commit/rollback record. (It would be harder to keep a count for every open transaction,
...@@ -136,6 +135,9 @@ struct cachefile { ...@@ -136,6 +135,9 @@ struct cachefile {
struct fileid fileid; struct fileid fileid;
FILENUM filenum; FILENUM filenum;
char *fname; char *fname;
void *userdata;
int (*close_userdata)(CACHEFILE cf, void *userdata); // when closing the last reference to a cachefile, first call this function.
}; };
int toku_create_cachetable(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER logger) { int toku_create_cachetable(CACHETABLE *result, long size_limit, LSN initial_lsn, TOKULOGGER logger) {
...@@ -163,6 +165,7 @@ int toku_create_cachetable(CACHETABLE *result, long size_limit, LSN initial_lsn, ...@@ -163,6 +165,7 @@ int toku_create_cachetable(CACHETABLE *result, long size_limit, LSN initial_lsn,
t->size_writing = 0; t->size_writing = 0;
t->lsn_of_checkpoint = initial_lsn; t->lsn_of_checkpoint = initial_lsn;
t->logger = logger; t->logger = logger;
int r; int r;
writequeue_init(&t->wq); writequeue_init(&t->wq);
r = pthread_mutex_init(&t->mutex, 0); assert(r == 0); r = pthread_mutex_init(&t->mutex, 0); assert(r == 0);
...@@ -233,9 +236,12 @@ int toku_cachetable_openfd (CACHEFILE *cf, CACHETABLE t, int fd, const char *fna ...@@ -233,9 +236,12 @@ int toku_cachetable_openfd (CACHEFILE *cf, CACHETABLE t, int fd, const char *fna
newcf->filenum.fileid = next_filenum_to_use.fileid++; newcf->filenum.fileid = next_filenum_to_use.fileid++;
cachefile_init_filenum(newcf, fd, fname, fileid); cachefile_init_filenum(newcf, fd, fname, fileid);
newcf->refcount = 1; newcf->refcount = 1;
newcf->header_fullhash = toku_cachetable_hash(newcf, header_blocknum);
newcf->next = t->cachefiles; newcf->next = t->cachefiles;
t->cachefiles = newcf; t->cachefiles = newcf;
newcf->userdata = 0;
newcf->close_userdata = 0;
*cf = newcf; *cf = newcf;
return 0; return 0;
} }
...@@ -301,6 +307,12 @@ int toku_cachefile_close (CACHEFILE *cfp, TOKULOGGER logger) { ...@@ -301,6 +307,12 @@ int toku_cachefile_close (CACHEFILE *cfp, TOKULOGGER logger) {
cachetable_unlock(ct); cachetable_unlock(ct);
return r; return r;
} }
if (cf->close_userdata && (r = cf->close_userdata(cf, cf->userdata))) {
cachetable_unlock(ct);
return r;
}
cf->close_userdata = NULL;
cf->userdata = NULL;
cf->cachetable->cachefiles = remove_cf_from_list(cf, cf->cachetable->cachefiles); cf->cachetable->cachefiles = remove_cf_from_list(cf, cf->cachetable->cachefiles);
cachetable_unlock(ct); cachetable_unlock(ct);
r = close(cf->fd); r = close(cf->fd);
...@@ -1117,11 +1129,6 @@ FILENUM toku_cachefile_filenum (CACHEFILE cf) { ...@@ -1117,11 +1129,6 @@ FILENUM toku_cachefile_filenum (CACHEFILE cf) {
return cf->filenum; return cf->filenum;
} }
u_int32_t toku_cachefile_fullhash_of_header (CACHEFILE cachefile) {
return cachefile->header_fullhash;
}
#if DO_WRITER_THREAD #if DO_WRITER_THREAD
// The writer thread waits for work in the write queue and writes the pair // The writer thread waits for work in the write queue and writes the pair
...@@ -1205,3 +1212,11 @@ int toku_cachetable_get_key_state (CACHETABLE ct, CACHEKEY key, CACHEFILE cf, vo ...@@ -1205,3 +1212,11 @@ int toku_cachetable_get_key_state (CACHETABLE ct, CACHEKEY key, CACHEFILE cf, vo
note_hash_count(count); note_hash_count(count);
return r; return r;
} }
void toku_cachefile_set_userdata (CACHEFILE cf, void *userdata, int (*close_userdata)(CACHEFILE, void*)) {
cf->userdata = userdata;
cf->close_userdata = close_userdata;
}
void *toku_cachefile_get_userdata(CACHEFILE cf) {
return cf->userdata;
}
...@@ -46,14 +46,18 @@ typedef void (*CACHETABLE_FLUSH_CALLBACK)(CACHEFILE, CACHEKEY key, void *value, ...@@ -46,14 +46,18 @@ typedef void (*CACHETABLE_FLUSH_CALLBACK)(CACHEFILE, CACHEKEY key, void *value,
// the fetch callback // the fetch callback
typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, CACHEKEY key, u_int32_t fullhash, void **value, long *sizep, void *extraargs, LSN *written_lsn); typedef int (*CACHETABLE_FETCH_CALLBACK)(CACHEFILE, CACHEKEY key, u_int32_t fullhash, void **value, long *sizep, void *extraargs, LSN *written_lsn);
// Put a key and value pair into the cachetable void toku_cachefile_set_userdata(CACHEFILE cf, void *userdata, int (*close_userdata)(CACHEFILE, void*));
// effects: if the key,cachefile is not in the cachetable, then insert the pair and pin it. // Effect: Store some cachefile-specific data. When the last reference to a cachefile is closed, we call close_userdata.
// returns: 0 if success, otherwise an error // If userdata is already non-NULL, then we simply overwrite it.
void *toku_cachefile_get_userdata(CACHEFILE);
int toku_cachetable_put(CACHEFILE cf, CACHEKEY key, u_int32_t fullhash, int toku_cachetable_put(CACHEFILE cf, CACHEKEY key, u_int32_t fullhash,
void *value, long size, void *value, long size,
CACHETABLE_FLUSH_CALLBACK flush_callback, CACHETABLE_FLUSH_CALLBACK flush_callback,
CACHETABLE_FETCH_CALLBACK fetch_callback, void *extraargs); CACHETABLE_FETCH_CALLBACK fetch_callback, void *extraargs);
// Effect: Put a key and value pair into the cachetable
// If the key,cachefile is not in the cachetable, then insert the pair and pin it.
// returns: 0 if success, otherwise an error
int toku_cachetable_get_and_pin(CACHEFILE, CACHEKEY, u_int32_t /*fullhash*/, int toku_cachetable_get_and_pin(CACHEFILE, CACHEKEY, u_int32_t /*fullhash*/,
void **/*value*/, long *sizep, void **/*value*/, long *sizep,
......
...@@ -149,9 +149,7 @@ static void toku_recover_fheader (LSN UU(lsn), TXNID UU(txnid),FILENUM filenum,L ...@@ -149,9 +149,7 @@ static void toku_recover_fheader (LSN UU(lsn), TXNID UU(txnid),FILENUM filenum,L
} else { } else {
assert(0); assert(0);
} }
u_int32_t fullhash = toku_cachetable_hash(pair->cf, header_blocknum); //toku_cachetable_put(pair->cf, header_blocknum, fullhash, h, 0, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0);
h->fullhash = fullhash;
toku_cachetable_put(pair->cf, header_blocknum, fullhash, h, 0, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0);
if (pair->brt) { if (pair->brt) {
toku_free(pair->brt->h); toku_free(pair->brt->h);
} else { } else {
...@@ -168,8 +166,7 @@ static void toku_recover_fheader (LSN UU(lsn), TXNID UU(txnid),FILENUM filenum,L ...@@ -168,8 +166,7 @@ static void toku_recover_fheader (LSN UU(lsn), TXNID UU(txnid),FILENUM filenum,L
pair->brt->h = h; pair->brt->h = h;
pair->brt->nodesize = h->nodesize; pair->brt->nodesize = h->nodesize;
pair->brt->flags = h->nodesize; pair->brt->flags = h->nodesize;
r = toku_unpin_brt_header(pair->brt); toku_cachefile_set_userdata(pair->cf, pair->brt->h, toku_brtheader_close);
assert(r==0);
} }
void toku_recover_newbrtnode (LSN lsn, FILENUM filenum, BLOCKNUM blocknum,u_int32_t height,u_int32_t nodesize,u_int8_t is_dup_sort,u_int32_t rand4fingerprint) { void toku_recover_newbrtnode (LSN lsn, FILENUM filenum, BLOCKNUM blocknum,u_int32_t height,u_int32_t nodesize,u_int8_t is_dup_sort,u_int32_t rand4fingerprint) {
...@@ -238,12 +235,10 @@ static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILE ...@@ -238,12 +235,10 @@ static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILE
struct cf_pair *pair = NULL; struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair); int r = find_cachefile(filenum, &pair);
assert(r==0); assert(r==0);
void *h_v; //void *h_v;
u_int32_t fullhash = toku_cachetable_hash(pair->cf, header_blocknum); //r = toku_cachetable_get_and_pin(pair->cf, header_blocknum, fullhash,
r = toku_cachetable_get_and_pin(pair->cf, header_blocknum, fullhash, // &h_v, NULL, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0);
&h_v, NULL, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0); struct brt_header *h=0;
assert(r==0);
struct brt_header *h=h_v;
bytevec storedkey,storeddata; bytevec storedkey,storeddata;
ITEMLEN storedkeylen, storeddatalen; ITEMLEN storedkeylen, storeddatalen;
TXNID storedxid; TXNID storedxid;
...@@ -252,8 +247,8 @@ static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILE ...@@ -252,8 +247,8 @@ static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILE
assert(r==0); assert(r==0);
r = toku_fifo_deq(h->fifo); r = toku_fifo_deq(h->fifo);
assert(r==0); assert(r==0);
r = toku_cachetable_unpin(pair->cf, header_blocknum, fullhash, 1, 0); //r = toku_cachetable_unpin(pair->cf, header_blocknum, fullhash, 1, 0);
assert(r==0); //assert(r==0);
} }
void toku_recover_enqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING val) { void toku_recover_enqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING val) {
...@@ -262,7 +257,12 @@ void toku_recover_enqrootentry (LSN lsn __attribute__((__unused__)), FILENUM fil ...@@ -262,7 +257,12 @@ void toku_recover_enqrootentry (LSN lsn __attribute__((__unused__)), FILENUM fil
assert(r==0); assert(r==0);
void *h_v; void *h_v;
u_int32_t fullhash = toku_cachetable_hash(pair->cf, header_blocknum); u_int32_t fullhash = toku_cachetable_hash(pair->cf, header_blocknum);
r = toku_cachetable_get_and_pin(pair->cf, header_blocknum, fullhash, &h_v, NULL, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0); if (0) {
//r = toku_cachetable_get_and_pin(pair->cf, header_blocknum, fullhash, &h_v, NULL, toku_brtheader_flush_callback, toku_brtheader_fetch_callback, 0);
} else {
h_v=0;
assert(0);
}
assert(r==0); assert(r==0);
struct brt_header *h=h_v; struct brt_header *h=h_v;
r = toku_fifo_enq(h->fifo, key.data, key.len, val.data, val.len, typ, xid); r = toku_fifo_enq(h->fifo, key.data, key.len, val.data, val.len, typ, xid);
...@@ -655,11 +655,9 @@ void toku_recover_changeunnamedroot (LSN UU(lsn), FILENUM filenum, BLOCKNUM UU(o ...@@ -655,11 +655,9 @@ void toku_recover_changeunnamedroot (LSN UU(lsn), FILENUM filenum, BLOCKNUM UU(o
int r = find_cachefile(filenum, &pair); int r = find_cachefile(filenum, &pair);
assert(r==0); assert(r==0);
assert(pair->brt); assert(pair->brt);
r = toku_read_and_pin_brt_header(pair->cf, &pair->brt->h); assert(pair->brt->h);
assert(r==0);
pair->brt->h->roots[0] = newroot; pair->brt->h->roots[0] = newroot;
pair->brt->h->root_hashes[0].valid = FALSE; pair->brt->h->root_hashes[0].valid = FALSE;
r = toku_unpin_brt_header(pair->brt);
} }
void toku_recover_changenamedroot (LSN UU(lsn), FILENUM UU(filenum), BYTESTRING UU(name), BLOCKNUM UU(oldroot), BLOCKNUM UU(newroot)) { assert(0); } void toku_recover_changenamedroot (LSN UU(lsn), FILENUM UU(filenum), BYTESTRING UU(name), BLOCKNUM UU(oldroot), BLOCKNUM UU(newroot)) { assert(0); }
...@@ -668,10 +666,8 @@ void toku_recover_changeunusedmemory (LSN UU(lsn), FILENUM filenum, BLOCKNUM UU( ...@@ -668,10 +666,8 @@ void toku_recover_changeunusedmemory (LSN UU(lsn), FILENUM filenum, BLOCKNUM UU(
int r = find_cachefile(filenum, &pair); int r = find_cachefile(filenum, &pair);
assert(r==0); assert(r==0);
assert(pair->brt); assert(pair->brt);
r = toku_read_and_pin_brt_header(pair->cf, &pair->brt->h); assert(pair->brt->h);
assert(r==0);
pair->brt->h->unused_blocks = newunused; pair->brt->h->unused_blocks = newunused;
r = toku_unpin_brt_header(pair->brt);
} }
static int toku_recover_checkpoint (LSN UU(lsn)) { static int toku_recover_checkpoint (LSN UU(lsn)) {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment