Commit 0fd43d47 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Fix the hashit() problem

git-svn-id: file:///svn/tokudb@491 c7de825b-a66e-492c-adef-691d508d4ae1
parent d7408933
......@@ -45,6 +45,7 @@ BINS = $(REGRESSION_TESTS) \
libs: log.o
bins: $(BINS)
check: bins
./benchmark-test --valsize 256 --verify 1
$(DTOOL) ./ybt-test
$(DTOOL) ./pma-test
$(DTOOL) ./cachetable-test
......
......@@ -21,6 +21,8 @@ enum { NODE_SIZE = 1<<20 };
int nodesize = NODE_SIZE;
int keysize = sizeof (long long);
int valsize = sizeof (long long);
int do_verify =0; /* Do a slow verify after every insert. */
CACHETABLE ct;
BRT t;
......@@ -51,6 +53,7 @@ void insert (long long v) {
memset(vc, 0, sizeof vc);
long_long_to_array(vc, v);
brt_insert(t, fill_dbt(&kt, kc, keysize), fill_dbt(&vt, vc, valsize), 0, 0);
if (do_verify) cachetable_verify(ct);
}
void serial_insert_from (long long from) {
......@@ -94,7 +97,7 @@ void biginsert (long long n_elements, struct timeval *starttime) {
}
void usage() {
printf("benchmark-test [--nodesize NODESIZE] [--keysize KEYSIZE] [--valsize VALSIZE] [TOTALITEMS]\n");
printf("benchmark-test [--nodesize NODESIZE] [--keysize KEYSIZE] [--valsize VALSIZE] [--verify] [TOTALITEMS]\n");
}
int main (int argc, char *argv[]) {
......@@ -128,6 +131,11 @@ int main (int argc, char *argv[]) {
continue;
}
if (strcmp(arg, "--verify")==0) {
do_verify = 1;
continue;
}
usage();
return 1;
}
......
......@@ -351,13 +351,13 @@ int serialize_brt_header_to (int fd, struct brt_header *h) {
}
int deserialize_brtheader_from (int fd, diskoff off, struct brt_header **brth) {
printf("%s:%d calling MALLOC\n", __FILE__, __LINE__);
//printf("%s:%d calling MALLOC\n", __FILE__, __LINE__);
struct brt_header *MALLOC(h);
struct rbuf rc;
int size;
int sizeagain;
assert(off==0);
printf("%s:%d malloced %p\n", __FILE__, __LINE__, h);
//printf("%s:%d malloced %p\n", __FILE__, __LINE__, h);
{
uint32_t size_n;
ssize_t r = pread(fd, &size_n, sizeof(size_n), off);
......
......@@ -30,10 +30,10 @@ static void test0 (void) {
unlink(fname);
r = open_brt(fname, 0, 1, &t, 1024, ct, default_compare_fun);
assert(r==0);
printf("%s:%d test0\n", __FILE__, __LINE__);
printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
//printf("%s:%d test0\n", __FILE__, __LINE__);
//printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
r = close_brt(t); assert(r==0);
printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced); assert(r==0);
//printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
r = cachetable_close(&ct);
assert(r==0);
memory_check_all_free();
......@@ -767,47 +767,49 @@ static void test_wrongendian_compare (int wrong_p, unsigned int N) {
r = close_brt(brt);
}
if (1) {
r = open_brt(n, 0, 1, &brt, 1<<20, ct, wrong_p ? wrong_compare_fun : default_compare_fun); assert(r==0);
{
cachetable_verify(ct);
r = open_brt(n, 0, 1, &brt, 1<<20, ct, wrong_p ? wrong_compare_fun : default_compare_fun); assert(r==0);
cachetable_verify(ct);
for (i=0; i<N; i++) {
unsigned char a[4],b[4];
b[3] = a[0] = i&255;
b[2] = a[1] = (i>>8)&255;
b[1] = a[2] = (i>>16)&255;
b[0] = a[3] = (i>>24)&255;
fill_b(&kbt, a, sizeof(a));
fill_dbt(&vbt, b, sizeof(b));
if (0) printf("%s:%d insert: %02x%02x%02x%02x -> %02x%02x%02x%02x\n", __FILE__, __LINE__,
((unsigned char*)kbt.data)[0], ((unsigned char*)kbt.data)[1], ((unsigned char*)kbt.data)[2], ((unsigned char*)kbt.data)[3],
((unsigned char*)vbt.data)[0], ((unsigned char*)vbt.data)[1], ((unsigned char*)vbt.data)[2], ((unsigned char*)vbt.data)[3]);
r = brt_insert(brt, &kbt, &vbt, &nonce_db, null_txn);
assert(r==0);
}
r = brt_cursor(brt, &cursor); assert(r==0);
int prev=-1;
for (i=0; i<N; i++) {
int this;
init_dbt(&kbt); init_dbt(&vbt);
r = brt_cursor_get(cursor, &kbt, &vbt, DB_NEXT, null_db, null_txn);
assert(r==0);
assert(kbt.size==4 && vbt.size==4);
if (0) printf("%s:%d %02x%02x%02x%02x -> %02x%02x%02x%02x\n", __FILE__, __LINE__,
((unsigned char*)kbt.data)[0], ((unsigned char*)kbt.data)[1], ((unsigned char*)kbt.data)[2], ((unsigned char*)kbt.data)[3],
((unsigned char*)vbt.data)[0], ((unsigned char*)vbt.data)[1], ((unsigned char*)vbt.data)[2], ((unsigned char*)vbt.data)[3]);
this= ( (((unsigned char*)kbt.data)[3] << 24) +
(((unsigned char*)kbt.data)[2] << 16) +
(((unsigned char*)kbt.data)[1] << 8) +
(((unsigned char*)kbt.data)[0] << 0));
assert(prev<this);
prev=this;
assert(this==(int)i);
}
for (i=0; i<N; i++) {
unsigned char a[4],b[4];
b[3] = a[0] = i&255;
b[2] = a[1] = (i>>8)&255;
b[1] = a[2] = (i>>16)&255;
b[0] = a[3] = (i>>24)&255;
fill_b(&kbt, a, sizeof(a));
fill_dbt(&vbt, b, sizeof(b));
if (0) printf("%s:%d insert: %02x%02x%02x%02x -> %02x%02x%02x%02x\n", __FILE__, __LINE__,
((unsigned char*)kbt.data)[0], ((unsigned char*)kbt.data)[1], ((unsigned char*)kbt.data)[2], ((unsigned char*)kbt.data)[3],
((unsigned char*)vbt.data)[0], ((unsigned char*)vbt.data)[1], ((unsigned char*)vbt.data)[2], ((unsigned char*)vbt.data)[3]);
r = brt_insert(brt, &kbt, &vbt, &nonce_db, null_txn);
assert(r==0);
cachetable_verify(ct);
}
r = brt_cursor(brt, &cursor); assert(r==0);
int prev=-1;
for (i=0; i<N; i++) {
int this;
init_dbt(&kbt); init_dbt(&vbt);
r = brt_cursor_get(cursor, &kbt, &vbt, DB_NEXT, null_db, null_txn);
assert(r==0);
assert(kbt.size==4 && vbt.size==4);
if (0) printf("%s:%d %02x%02x%02x%02x -> %02x%02x%02x%02x\n", __FILE__, __LINE__,
((unsigned char*)kbt.data)[0], ((unsigned char*)kbt.data)[1], ((unsigned char*)kbt.data)[2], ((unsigned char*)kbt.data)[3],
((unsigned char*)vbt.data)[0], ((unsigned char*)vbt.data)[1], ((unsigned char*)vbt.data)[2], ((unsigned char*)vbt.data)[3]);
this= ( (((unsigned char*)kbt.data)[3] << 24) +
(((unsigned char*)kbt.data)[2] << 16) +
(((unsigned char*)kbt.data)[1] << 8) +
(((unsigned char*)kbt.data)[0] << 0));
assert(prev<this);
prev=this;
assert(this==(int)i);
cachetable_verify(ct);
}
r = close_brt(brt);
r = close_brt(brt);
}
r = cachetable_close(&ct); assert(r==0);
memory_check_all_free();
......
......@@ -32,6 +32,7 @@ struct ctpair {
cachetable_flush_func_t flush_callback;
cachetable_fetch_func_t fetch_callback;
void*extraargs;
int verify_flag; /* Used in verify_cachetable() */
};
struct cachetable {
......@@ -257,7 +258,6 @@ static PAIR remove_from_hash_chain (PAIR remove_me, PAIR list) {
}
static void flush_and_remove (CACHETABLE t, PAIR remove_me, int write_me) {
unsigned int h = hashit(t, remove_me->key);
lru_remove(t, remove_me);
//printf("flush_callback(%lld,%p)\n", remove_me->key, remove_me->value);
WHEN_TRACE_CT(printf("%s:%d CT flush_callback(%lld, %p, dirty=%d, 0)\n", __FILE__, __LINE__, remove_me->key, remove_me->value, remove_me->dirty && write_me));
......@@ -266,7 +266,10 @@ static void flush_and_remove (CACHETABLE t, PAIR remove_me, int write_me) {
remove_me->flush_callback(remove_me->cachefile, remove_me->key, remove_me->value, remove_me->size, remove_me->dirty && write_me, 0);
t->n_in_table--;
// Remove it from the hash chain.
t->table[h] = remove_from_hash_chain (remove_me, t->table[h]);
{
unsigned int h = hashit(t, remove_me->key);
t->table[h] = remove_from_hash_chain (remove_me, t->table[h]);
}
t->size_current -= remove_me->size;
toku_free(remove_me);
}
......@@ -282,11 +285,8 @@ static void flush_and_keep (PAIR flush_me) {
static int maybe_flush_some (CACHETABLE t, long size __attribute__((unused))) {
int r = 0;
again:
#if 0
if (t->n_in_table >= t->table_size) {
#else
// if (t->n_in_table >= t->table_size) {
if (size + t->size_current > t->size_limit) {
#endif
/* Try to remove one. */
PAIR remove_me;
for (remove_me = t->tail; remove_me; remove_me = remove_me->prev) {
......@@ -328,18 +328,18 @@ static int cachetable_insert_at(CACHEFILE cachefile, int h, CACHEKEY key, void *
ct->table[h] = p;
ct->n_in_table++;
ct->size_current += size;
if (ct->n_in_table > ct->table_size)
if (ct->n_in_table > ct->table_size) {
cachetable_rehash(ct, +1);
}
return 0;
}
int cachetable_put_size(CACHEFILE cachefile, CACHEKEY key, void*value, long size,
cachetable_flush_func_t flush_callback, cachetable_fetch_func_t fetch_callback, void *extraargs) {
int h = hashit(cachefile->cachetable, key);
WHEN_TRACE_CT(printf("%s:%d CT cachetable_put(%lld)=%p\n", __FILE__, __LINE__, key, value));
{
PAIR p;
for (p=cachefile->cachetable->table[h]; p; p=p->hash_chain) {
for (p=cachefile->cachetable->table[hashit(cachefile->cachetable, key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
// Semantically, these two asserts are not strictly right. After all, when are two functions eq?
// In practice, the functions better be the same.
......@@ -351,15 +351,17 @@ int cachetable_put_size(CACHEFILE cachefile, CACHEKEY key, void*value, long size
}
if (maybe_flush_some(cachefile->cachetable, size))
return -2;
return cachetable_insert_at(cachefile, h, key, value, size, flush_callback, fetch_callback, extraargs, 1);
// flushing could change the result from hashit()
int r = cachetable_insert_at(cachefile, hashit(cachefile->cachetable, key), key, value, size, flush_callback, fetch_callback, extraargs, 1);
return r;
}
int cachetable_get_and_pin_size (CACHEFILE cachefile, CACHEKEY key, void**value, long *sizep,
cachetable_flush_func_t flush_callback, cachetable_fetch_func_t fetch_callback, void *extraargs) {
CACHETABLE t = cachefile->cachetable;
int h = hashit(t,key);
int tsize __attribute__((__unused__)) = t->table_size;
PAIR p;
for (p=t->table[h]; p; p=p->hash_chain) {
for (p=t->table[hashit(t,key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
*value = p->value;
*sizep = p->size;
......@@ -370,6 +372,7 @@ int cachetable_get_and_pin_size (CACHEFILE cachefile, CACHEKEY key, void**value,
}
}
if (maybe_flush_some(t, 1)) return -2;
// Note. hashit(t,key) may have changed as a result of flushing.
{
void *toku_value;
long size = 1; // compat
......@@ -377,7 +380,7 @@ int cachetable_get_and_pin_size (CACHEFILE cachefile, CACHEKEY key, void**value,
WHEN_TRACE_CT(printf("%s:%d CT: fetch_callback(%lld...)\n", __FILE__, __LINE__, key));
if ((r=fetch_callback(cachefile, key, &toku_value, &size, extraargs)))
return r;
cachetable_insert_at(cachefile, h, key, toku_value, size, flush_callback, fetch_callback, extraargs, 0);
cachetable_insert_at(cachefile, hashit(t,key), key, toku_value, size, flush_callback, fetch_callback, extraargs, 0);
*value = toku_value;
if (sizep)
*sizep = size;
......@@ -389,9 +392,8 @@ int cachetable_get_and_pin_size (CACHEFILE cachefile, CACHEKEY key, void**value,
int cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, void**value) {
CACHETABLE t = cachefile->cachetable;
int h = hashit(t,key);
PAIR p;
for (p=t->table[h]; p; p=p->hash_chain) {
for (p=t->table[hashit(t,key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
*value = p->value;
p->pinned++;
......@@ -406,11 +408,10 @@ int cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, void**value
int cachetable_unpin_size (CACHEFILE cachefile, CACHEKEY key, int dirty, long size) {
CACHETABLE t = cachefile->cachetable;
int h = hashit(t,key);
PAIR p;
WHEN_TRACE_CT(printf("%s:%d unpin(%lld)", __FILE__, __LINE__, key));
//printf("%s:%d is dirty now=%d\n", __FILE__, __LINE__, dirty);
for (p=t->table[h]; p; p=p->hash_chain) {
for (p=t->table[hashit(t,key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
assert(p->pinned>0);
p->pinned--;
......@@ -437,6 +438,51 @@ int cachetable_flush (CACHETABLE t) {
return 0;
}
void cachefile_verify (CACHEFILE cf) {
cachetable_verify(cf->cachetable);
}
void cachetable_verify (CACHETABLE t) {
// First clear all the verify flags by going through the hash chains
{
int i;
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
p->verify_flag=0;
}
}
}
// Now go through the LRU chain, make sure everything in the LRU chain is hashed, and set the verify flag.
{
PAIR p;
for (p=t->head; p; p=p->next) {
assert(p->verify_flag==0);
PAIR p2;
for (p2=t->table[hashit(t,p->key)]; p2; p2=p2->hash_chain) {
if (p2==p) {
/* found it */
goto next;
}
}
fprintf(stderr, "Something in the LRU chain is not hashed\n");
assert(0);
next:
p->verify_flag = 1;
}
}
// Now make sure everything in the hash chains has the verify_flag set to 1.
{
int i;
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
assert(p->verify_flag);
}
}
}
}
static void assert_cachefile_is_flushed_and_removed (CACHEFILE cf) {
CACHETABLE t = cf->cachetable;
int i;
......@@ -500,16 +546,16 @@ int cachetable_close (CACHETABLE *tp) {
int cachetable_remove (CACHEFILE cachefile, CACHEKEY key, int write_me) {
/* Removing something already present is OK. */
CACHETABLE t = cachefile->cachetable;
int h = hashit(t,key);
PAIR p;
for (p=t->table[h]; p; p=p->hash_chain) {
for (p=t->table[hashit(t,key)]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
flush_and_remove(t, p, write_me);
if (4 * t->n_in_table < t->table_size)
cachetable_rehash(t, -1);
return 0;
goto done;
}
}
done:
return 0;
}
......@@ -581,9 +627,8 @@ void cachetable_get_state(CACHETABLE ct, int *num_entries_ptr, int *hash_size_pt
int cachetable_get_key_state(CACHETABLE ct, CACHEKEY key, void **value_ptr,
int *dirty_ptr, long long *pin_ptr, long *size_ptr) {
int h = hashit(ct, key);
PAIR p;
for (p = ct->table[h]; p; p = p->hash_chain) {
for (p = ct->table[hashit(ct, key)]; p; p = p->hash_chain) {
if (p->key == key) {
if (value_ptr)
*value_ptr = p->value;
......
......@@ -85,4 +85,7 @@ static inline int cachetable_unpin(CACHEFILE cf, CACHEKEY key, int dirty) {
return cachetable_unpin_size(cf, key, dirty, 1);
}
void cachefile_verify (CACHEFILE cf); // Verify the whole cachetable that the CF is in. Slow.
void cachetable_verify (CACHETABLE t); // Slow...
#endif
......@@ -155,6 +155,7 @@ int tokulogger_log_phys_add_or_delete_in_leaf (DB *db, TOKUTXN txn, diskoff disk
}
int tokulogger_fsync (TOKULOGGER logger) {
//return 0;/// NO TXN
if (logger->n_in_buf>0) {
int r = write(logger->fd, logger->buf, logger->n_in_buf);
if (r==-1) return errno;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment