Commit 66cd2fdd authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Merge is tokudb.1021 changes. Addresses #1021.

{{{
svn merge -r5107:5184 https://svn.tokutek.com/tokudb/tokudb.1021
}}}


git-svn-id: file:///svn/tokudb@5214 c7de825b-a66e-492c-adef-691d508d4ae1
parent 0817ad30
...@@ -31,7 +31,7 @@ VGRIND = valgrind --quiet --error-exitcode=1 --leak-check=yes ...@@ -31,7 +31,7 @@ VGRIND = valgrind --quiet --error-exitcode=1 --leak-check=yes
endif endif
CFLAGS = -Wall -W -Wcast-align -Wbad-function-cast -Wextra -Wmissing-noreturn -Wmissing-format-attribute $(OPTFLAGS) -g3 -ggdb3 $(GCOV_FLAGS) $(PROF_FLAGS) -Werror $(FPICFLAGS) -Wshadow -fvisibility=hidden CFLAGS = -Wall -W -Wcast-align -Wbad-function-cast -Wextra -Wmissing-noreturn -Wmissing-format-attribute $(OPTFLAGS) -g3 -ggdb3 $(GCOV_FLAGS) $(PROF_FLAGS) -Werror $(FPICFLAGS) -Wshadow -fvisibility=hidden
LDFLAGS = $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS) -lz LDFLAGS = $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS)
CPPFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_XOPEN_SOURCE=500 CPPFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_XOPEN_SOURCE=500
# Add -Wconversion # Add -Wconversion
...@@ -50,9 +50,7 @@ BINS= brtdump \ ...@@ -50,9 +50,7 @@ BINS= brtdump \
build default: bins libs tdb-recover tdb_logprint $(TEST_OFILES) build default: bins libs tdb-recover tdb_logprint $(TEST_OFILES)
cd tests;$(MAKE) build cd tests;$(MAKE) build
# Put crc first to make -combine work right
BRT_SOURCES = \ BRT_SOURCES = \
crc \
bread \ bread \
brt-serialize \ brt-serialize \
brt-verify \ brt-verify \
...@@ -67,6 +65,7 @@ BRT_SOURCES = \ ...@@ -67,6 +65,7 @@ BRT_SOURCES = \
memory \ memory \
memarena \ memarena \
mempool \ mempool \
murmur \
omt \ omt \
recover \ recover \
roll \ roll \
...@@ -113,7 +112,7 @@ check-fanout: ...@@ -113,7 +112,7 @@ check-fanout:
let BRT_FANOUT=BRT_FANOUT+1; \ let BRT_FANOUT=BRT_FANOUT+1; \
done done
BRT_INTERNAL_H_INCLUDES = brt-internal.h cachetable.h fifo.h omt.h brt.h brt-search.h brttypes.h ybt.h log.h ../include/db.h kv-pair.h memory.h crc.h mempool.h leafentry.h BRT_INTERNAL_H_INCLUDES = brt-internal.h cachetable.h fifo.h omt.h brt.h brt-search.h brttypes.h ybt.h log.h ../include/db.h kv-pair.h memory.h mempool.h leafentry.h
brt-test-helpers.o: $(BRT_INTERNAL_H_INCLUDES) toku_assert.h brt-test-helpers.o: $(BRT_INTERNAL_H_INCLUDES) toku_assert.h
logformat: logformat.c toku_assert.c logformat: logformat.c toku_assert.c
brt-serialize-test.o: $(BRT_INTERNAL_H_INCLUDES) brt-serialize-test.o: $(BRT_INTERNAL_H_INCLUDES)
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#include "cachetable.h" #include "cachetable.h"
#include "fifo.h" #include "fifo.h"
#include "brt.h" #include "brt.h"
#include "crc.h"
#include "list.h" #include "list.h"
#include "mempool.h" #include "mempool.h"
#include "kv-pair.h" #include "kv-pair.h"
...@@ -195,8 +194,8 @@ static const BRTNODE null_brtnode=0; ...@@ -195,8 +194,8 @@ static const BRTNODE null_brtnode=0;
//extern u_int32_t toku_calccrc32_kvpair (const void *key, int keylen, const void *val, int vallen); //extern u_int32_t toku_calccrc32_kvpair (const void *key, int keylen, const void *val, int vallen);
//extern u_int32_t toku_calccrc32_kvpair_struct (const struct kv_pair *kvp); //extern u_int32_t toku_calccrc32_kvpair_struct (const struct kv_pair *kvp);
extern u_int32_t toku_calccrc32_cmd (u_int32_t type, TXNID xid, const void *key, u_int32_t keylen, const void *val, u_int32_t vallen); extern u_int32_t toku_calc_fingerprint_cmd (u_int32_t type, TXNID xid, const void *key, u_int32_t keylen, const void *val, u_int32_t vallen);
extern u_int32_t toku_calccrc32_cmdstruct (BRT_CMD cmd); extern u_int32_t toku_calc_fingerprint_cmdstruct (BRT_CMD cmd);
// How long is the pivot key? // How long is the pivot key?
unsigned int toku_brt_pivot_key_len (BRT, struct kv_pair *); // Given the tree unsigned int toku_brt_pivot_key_len (BRT, struct kv_pair *); // Given the tree
...@@ -249,7 +248,8 @@ int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, b ...@@ -249,7 +248,8 @@ int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, b
enum brt_layout_version_e { enum brt_layout_version_e {
BRT_LAYOUT_VERSION_5 = 5, BRT_LAYOUT_VERSION_5 = 5,
BRT_LAYOUT_VERSION_6 = 6, // Diff from 5 to 6: Add leafentry_estimate BRT_LAYOUT_VERSION_6 = 6, // Diff from 5 to 6: Add leafentry_estimate
BRT_LAYOUT_VERSION_7 = 7, // Diff from 6 to 7: Add exact-bit to leafentry_estimate #818, add magic to header #22, add per-subdataase flags #333 BRT_LAYOUT_VERSION_7 = 7, // Diff from 6 to 7: Add exact-bit to leafentry_estimate #818, add magic to header #22, add per-subdatase flags #333
BRT_LAYOUT_VERSION_8 = 8, // Diff from 7 to 8: Use murmur instead of crc32. We are going to make a simplification and stop supporting version 7 and before.
BRT_ANTEULTIMATE_VERSION, // the version after the most recent version BRT_ANTEULTIMATE_VERSION, // the version after the most recent version
BRT_LAYOUT_VERSION = BRT_ANTEULTIMATE_VERSION-1 // A hack so I don't have to change this line. BRT_LAYOUT_VERSION = BRT_ANTEULTIMATE_VERSION-1 // A hack so I don't have to change this line.
}; };
......
...@@ -112,7 +112,7 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) { ...@@ -112,7 +112,7 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) {
wbuf_literal_bytes(&w, "toku", 4); wbuf_literal_bytes(&w, "toku", 4);
if (node->height==0) wbuf_literal_bytes(&w, "leaf", 4); if (node->height==0) wbuf_literal_bytes(&w, "leaf", 4);
else wbuf_literal_bytes(&w, "node", 4); else wbuf_literal_bytes(&w, "node", 4);
wbuf_int(&w, BRT_LAYOUT_VERSION_7); wbuf_int(&w, BRT_LAYOUT_VERSION);
wbuf_ulonglong(&w, node->log_lsn.lsn); wbuf_ulonglong(&w, node->log_lsn.lsn);
//printf("%s:%d %lld.calculated_size=%d\n", __FILE__, __LINE__, off, calculated_size); //printf("%s:%d %lld.calculated_size=%d\n", __FILE__, __LINE__, off, calculated_size);
wbuf_uint(&w, calculated_size); wbuf_uint(&w, calculated_size);
...@@ -168,7 +168,7 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) { ...@@ -168,7 +168,7 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) {
wbuf_TXNID(&w, xid); wbuf_TXNID(&w, xid);
wbuf_bytes(&w, key, keylen); wbuf_bytes(&w, key, keylen);
wbuf_bytes(&w, data, datalen); wbuf_bytes(&w, data, datalen);
check_local_fingerprint+=node->rand4fingerprint*toku_calccrc32_cmd(type, xid, key, keylen, data, datalen); check_local_fingerprint+=node->rand4fingerprint*toku_calc_fingerprint_cmd(type, xid, key, keylen, data, datalen);
})); }));
} }
//printf("%s:%d check_local_fingerprint=%8x\n", __FILE__, __LINE__, check_local_fingerprint); //printf("%s:%d check_local_fingerprint=%8x\n", __FILE__, __LINE__, check_local_fingerprint);
...@@ -191,7 +191,10 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) { ...@@ -191,7 +191,10 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) {
wbuf_int(&w, crc32(toku_null_crc, w.buf, w.ndone)); wbuf_int(&w, crc32(toku_null_crc, w.buf, w.ndone));
#endif #endif
#ifdef CRC_INCR #ifdef CRC_INCR
wbuf_uint(&w, w.crc32); {
u_int32_t checksum = murmur_finish(&w.murmur);
wbuf_uint(&w, checksum);
}
#endif #endif
if (!node->ever_been_written) if (!node->ever_been_written)
...@@ -272,9 +275,8 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN ...@@ -272,9 +275,8 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN
result->layout_version = rbuf_int(&rc); result->layout_version = rbuf_int(&rc);
{ {
switch (result->layout_version) { switch (result->layout_version) {
case BRT_LAYOUT_VERSION_5: case BRT_LAYOUT_VERSION_8: goto ok_layout_version;
case BRT_LAYOUT_VERSION_6: // Don't support older versions.
case BRT_LAYOUT_VERSION_7: goto ok_layout_version;
} }
r=DB_BADFORMAT; r=DB_BADFORMAT;
goto died1; goto died1;
...@@ -309,11 +311,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN ...@@ -309,11 +311,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN
u_int32_t childfp = rbuf_int(&rc); u_int32_t childfp = rbuf_int(&rc);
BNC_SUBTREE_FINGERPRINT(result, i)= childfp; BNC_SUBTREE_FINGERPRINT(result, i)= childfp;
check_subtree_fingerprint += childfp; check_subtree_fingerprint += childfp;
if (result->layout_version>BRT_LAYOUT_VERSION_5) { BNC_SUBTREE_LEAFENTRY_ESTIMATE(result, i)=rbuf_ulonglong(&rc);
BNC_SUBTREE_LEAFENTRY_ESTIMATE(result, i)=rbuf_ulonglong(&rc);
} else {
BNC_SUBTREE_LEAFENTRY_ESTIMATE(result, i)=0;
}
} }
for (i=0; i<result->u.n.n_children-1; i++) { for (i=0; i<result->u.n.n_children-1; i++) {
if (result->flags & TOKU_DB_DUPSORT) { if (result->flags & TOKU_DB_DUPSORT) {
...@@ -362,7 +360,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN ...@@ -362,7 +360,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN
TXNID xid = rbuf_ulonglong(&rc); TXNID xid = rbuf_ulonglong(&rc);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */ rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
rbuf_bytes(&rc, &val, &vallen); rbuf_bytes(&rc, &val, &vallen);
check_local_fingerprint += result->rand4fingerprint * toku_calccrc32_cmd(type, xid, key, keylen, val, vallen); check_local_fingerprint += result->rand4fingerprint * toku_calc_fingerprint_cmd(type, xid, key, keylen, val, vallen);
//printf("Found %s,%s\n", (char*)key, (char*)val); //printf("Found %s,%s\n", (char*)key, (char*)val);
{ {
r=toku_fifo_enq(BNC_BUFFER(result, cnum), key, keylen, val, vallen, type, xid); /* Copies the data into the hash table. */ r=toku_fifo_enq(BNC_BUFFER(result, cnum), key, keylen, val, vallen, type, xid); /* Copies the data into the hash table. */
...@@ -401,7 +399,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN ...@@ -401,7 +399,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN
assert(rc.ndone<=rc.size); assert(rc.ndone<=rc.size);
array[i]=(OMTVALUE)le; array[i]=(OMTVALUE)le;
actual_sum += toku_crc32(toku_null_crc, le, disksize); actual_sum += murmur_string(le, disksize);
} }
u_int32_t end_of_data = rc.ndone; u_int32_t end_of_data = rc.ndone;
result->u.l.n_bytes_in_buffer += end_of_data-start_of_data + n_in_buf*OMT_ITEM_OVERHEAD; result->u.l.n_bytes_in_buffer += end_of_data-start_of_data + n_in_buf*OMT_ITEM_OVERHEAD;
...@@ -432,10 +430,11 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN ...@@ -432,10 +430,11 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, u_int32_t fullhash, BRTN
if (n_read_so_far+4!=rc.size) { if (n_read_so_far+4!=rc.size) {
r = DB_BADFORMAT; goto died_21; r = DB_BADFORMAT; goto died_21;
} }
uint32_t crc = toku_crc32(toku_null_crc, rc.buf, n_read_so_far); uint32_t crc = murmur_string(rc.buf, n_read_so_far);
uint32_t storedcrc = rbuf_int(&rc); uint32_t storedcrc = rbuf_int(&rc);
if (crc!=storedcrc) { if (crc!=storedcrc) {
printf("Bad CRC\n"); printf("Bad CRC\n");
printf("%s:%d crc=%08x stored=%08x\n", __FILE__, __LINE__, crc, storedcrc);
assert(0);//this is wrong!!! assert(0);//this is wrong!!!
r = DB_BADFORMAT; r = DB_BADFORMAT;
goto died_21; goto died_21;
...@@ -555,91 +554,6 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) { ...@@ -555,91 +554,6 @@ int toku_serialize_brt_header_to (int fd, struct brt_header *h) {
return r; return r;
} }
static int deserialize_brtheader_6_or_earlier (int fd, DISKOFF off, struct brt_header **brth, u_int32_t fullhash) {
// Deserialize a brt header from version 6 or earlier.
struct brt_header *MALLOC(h);
if (h==0) return errno;
h->fullhash = fullhash;
int ret=-1;
if (0) { died0: toku_free(h); return ret; }
int size;
int sizeagain;
h->layout_version = BRT_LAYOUT_VERSION_6;
{
uint32_t size_n;
ssize_t r = pread(fd, &size_n, sizeof(size_n), off);
assert(r==sizeof(size_n)); // we already read it earlier.
size = ntohl(size_n);
}
struct rbuf rc;
rc.buf = toku_malloc(size);
if (rc.buf == NULL) { ret = ENOMEM; goto died0; }
if (0) { died1: toku_free(rc.buf); goto died0; }
rc.size=size;
if (rc.size<=0) {ret = EINVAL; goto died1;}
rc.ndone=0;
{
ssize_t r = pread(fd, rc.buf, size, off);
if (r!=size) {ret = EINVAL; goto died1;}
}
h->dirty=0;
sizeagain = rbuf_int(&rc);
if (sizeagain!=size) {ret = EINVAL; goto died1;}
u_int32_t flags_for_all = rbuf_int(&rc);
h->nodesize = rbuf_int(&rc);
h->freelist = rbuf_diskoff(&rc);
h->unused_memory = rbuf_diskoff(&rc);
h->n_named_roots = rbuf_int(&rc);
if (h->n_named_roots>=0) {
int i;
MALLOC_N(h->n_named_roots, h->flags_array);
for (i=0; i<h->n_named_roots; i++) h->flags_array[i]=flags_for_all;
MALLOC_N(h->n_named_roots, h->roots);
MALLOC_N(h->n_named_roots, h->root_hashes);
if (h->n_named_roots > 0 && (h->roots == NULL || h->root_hashes==NULL)) {ret = ENOMEM; goto died1;}
if (0) {
died2:
toku_free(h->roots);
toku_free(h->root_hashes);
goto died1;
}
MALLOC_N(h->n_named_roots, h->names);
if (h->n_named_roots > 0 && h->names == NULL) {ret = ENOMEM; goto died2;}
if (0) {
died3:
toku_free(h->names);
for (i = 0; i < h->n_named_roots; i++) {
if (h->names[i]) toku_free(h->names[i]);
}
goto died2;
}
for (i=0; i<h->n_named_roots; i++) {
bytevec nameptr;
unsigned int len;
h->root_hashes[i].valid = FALSE;
h->roots[i] = rbuf_diskoff(&rc);
rbuf_bytes(&rc, &nameptr, &len);
if (strlen(nameptr)+1!=len) {ret = EINVAL; goto died3;}
h->names[i] = toku_memdup(nameptr,len);
if (len > 0 && h->names[i] == NULL) {ret = ENOMEM; goto died3;}
}
} else {
MALLOC_N(1, h->flags_array);
MALLOC_N(1, h->roots);
MALLOC_N(1, h->root_hashes);
h->flags_array[0]=flags_for_all;
h->roots[0] = rbuf_diskoff(&rc);
h->root_hashes[0].valid = FALSE;
h->names = 0;
}
if (rc.ndone!=rc.size) {ret = EINVAL; goto died3;}
toku_free(rc.buf);
*brth = h;
return 0;
}
int deserialize_brtheader_7_or_later(u_int32_t size, int fd, DISKOFF off, struct brt_header **brth, u_int32_t fullhash) { int deserialize_brtheader_7_or_later(u_int32_t size, int fd, DISKOFF off, struct brt_header **brth, u_int32_t fullhash) {
// We already know the first 8 bytes are "tokudata", and we read in the size. // We already know the first 8 bytes are "tokudata", and we read in the size.
struct brt_header *MALLOC(h); struct brt_header *MALLOC(h);
...@@ -660,7 +574,7 @@ int deserialize_brtheader_7_or_later(u_int32_t size, int fd, DISKOFF off, struct ...@@ -660,7 +574,7 @@ int deserialize_brtheader_7_or_later(u_int32_t size, int fd, DISKOFF off, struct
h->dirty=0; h->dirty=0;
h->layout_version = rbuf_int(&rc); h->layout_version = rbuf_int(&rc);
h->nodesize = rbuf_int(&rc); h->nodesize = rbuf_int(&rc);
assert(h->layout_version==BRT_LAYOUT_VERSION_7); assert(h->layout_version==BRT_LAYOUT_VERSION_8);
h->freelist = rbuf_diskoff(&rc); h->freelist = rbuf_diskoff(&rc);
h->unused_memory = rbuf_diskoff(&rc); h->unused_memory = rbuf_diskoff(&rc);
h->n_named_roots = rbuf_int(&rc); h->n_named_roots = rbuf_int(&rc);
...@@ -708,12 +622,9 @@ int toku_deserialize_brtheader_from (int fd, DISKOFF off, u_int32_t fullhash, st ...@@ -708,12 +622,9 @@ int toku_deserialize_brtheader_from (int fd, DISKOFF off, u_int32_t fullhash, st
if (r==0) return -1; if (r==0) return -1;
if (r<0) return errno; if (r<0) return errno;
if (r!=12) return EINVAL; if (r!=12) return EINVAL;
if (memcmp(magic,"tokudata",8)==0) { assert(memcmp(magic,"tokudata",8)==0);
// It's version 7 or later // It's version 7 or later, and the magi clooks OK
return deserialize_brtheader_7_or_later(ntohl(*(int*)(&magic[8])), fd, off, brth, fullhash); return deserialize_brtheader_7_or_later(ntohl(*(int*)(&magic[8])), fd, off, brth, fullhash);
} else {
return deserialize_brtheader_6_or_earlier(fd, off, brth, fullhash);
}
} }
unsigned int toku_brt_pivot_key_len (BRT brt, struct kv_pair *pk) { unsigned int toku_brt_pivot_key_len (BRT brt, struct kv_pair *pk) {
......
...@@ -137,7 +137,7 @@ int toku_testsetup_insert_to_nonleaf (BRT brt, DISKOFF diskoff, enum brt_cmd_typ ...@@ -137,7 +137,7 @@ int toku_testsetup_insert_to_nonleaf (BRT brt, DISKOFF diskoff, enum brt_cmd_typ
r = toku_fifo_enq(BNC_BUFFER(node, childnum), key, keylen, val, vallen, cmdtype, (TXNID)0); r = toku_fifo_enq(BNC_BUFFER(node, childnum), key, keylen, val, vallen, cmdtype, (TXNID)0);
assert(r==0); assert(r==0);
u_int32_t fdelta = node->rand4fingerprint * toku_calccrc32_cmd(cmdtype, (TXNID)0, key, keylen, val, vallen); u_int32_t fdelta = node->rand4fingerprint * toku_calc_fingerprint_cmd(cmdtype, (TXNID)0, key, keylen, val, vallen);
node->local_fingerprint += fdelta; node->local_fingerprint += fdelta;
*subtree_fingerprint += fdelta; *subtree_fingerprint += fdelta;
int sizediff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD; int sizediff = keylen + vallen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
......
...@@ -26,7 +26,7 @@ static void verify_local_fingerprint (BRTNODE node) { ...@@ -26,7 +26,7 @@ static void verify_local_fingerprint (BRTNODE node) {
for (i=0; i<node->u.n.n_children; i++) for (i=0; i<node->u.n.n_children; i++)
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid, FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid,
({ ({
fp += node->rand4fingerprint * toku_calccrc32_cmd(type, xid, key, keylen, data, datalen); fp += node->rand4fingerprint * toku_calc_fingerprint_cmd(type, xid, key, keylen, data, datalen);
})); }));
assert(fp==node->local_fingerprint); assert(fp==node->local_fingerprint);
} else { } else {
......
...@@ -391,7 +391,7 @@ static int insert_to_buffer_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT ...@@ -391,7 +391,7 @@ static int insert_to_buffer_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT
int r = toku_fifo_enq(BNC_BUFFER(node,childnum), k->data, k->size, v->data, v->size, type, xid); int r = toku_fifo_enq(BNC_BUFFER(node,childnum), k->data, k->size, v->data, v->size, type, xid);
if (r!=0) return r; if (r!=0) return r;
// printf("%s:%d fingerprint %08x -> ", __FILE__, __LINE__, node->local_fingerprint); // printf("%s:%d fingerprint %08x -> ", __FILE__, __LINE__, node->local_fingerprint);
node->local_fingerprint += node->rand4fingerprint*toku_calccrc32_cmd(type, xid, k->data, k->size, v->data, v->size); node->local_fingerprint += node->rand4fingerprint*toku_calc_fingerprint_cmd(type, xid, k->data, k->size, v->data, v->size);
// printf(" %08x\n", node->local_fingerprint); // printf(" %08x\n", node->local_fingerprint);
BNC_NBYTESINBUF(node,childnum) += n_bytes_added; BNC_NBYTESINBUF(node,childnum) += n_bytes_added;
node->u.n.n_bytes_in_buffers += n_bytes_added; node->u.n.n_bytes_in_buffers += n_bytes_added;
...@@ -566,7 +566,7 @@ static int log_and_save_brtenq(TOKULOGGER logger, BRT t, BRTNODE node, int child ...@@ -566,7 +566,7 @@ static int log_and_save_brtenq(TOKULOGGER logger, BRT t, BRTNODE node, int child
BYTESTRING keybs = {.len=keylen, .data=(char*)key}; BYTESTRING keybs = {.len=keylen, .data=(char*)key};
BYTESTRING databs = {.len=datalen, .data=(char*)data}; BYTESTRING databs = {.len=datalen, .data=(char*)data};
u_int32_t old_fingerprint = *fingerprint; u_int32_t old_fingerprint = *fingerprint;
u_int32_t fdiff=node->rand4fingerprint*toku_calccrc32_cmd(type, xid, key, keylen, data, datalen); u_int32_t fdiff=node->rand4fingerprint*toku_calc_fingerprint_cmd(type, xid, key, keylen, data, datalen);
u_int32_t new_fingerprint = old_fingerprint + fdiff; u_int32_t new_fingerprint = old_fingerprint + fdiff;
//printf("%s:%d node=%lld fingerprint old=%08x new=%08x diff=%08x xid=%lld\n", __FILE__, __LINE__, (long long)node->thisnodename, old_fingerprint, new_fingerprint, fdiff, (long long)xid); //printf("%s:%d node=%lld fingerprint old=%08x new=%08x diff=%08x xid=%lld\n", __FILE__, __LINE__, (long long)node->thisnodename, old_fingerprint, new_fingerprint, fdiff, (long long)xid);
*fingerprint = new_fingerprint; *fingerprint = new_fingerprint;
...@@ -631,7 +631,7 @@ static int brt_nonleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *node ...@@ -631,7 +631,7 @@ static int brt_nonleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *node
if (fr!=0) break; if (fr!=0) break;
int n_bytes_moved = keylen+datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD; int n_bytes_moved = keylen+datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
u_int32_t old_from_fingerprint = node->local_fingerprint; u_int32_t old_from_fingerprint = node->local_fingerprint;
u_int32_t delta = toku_calccrc32_cmd(type, xid, key, keylen, data, datalen); u_int32_t delta = toku_calc_fingerprint_cmd(type, xid, key, keylen, data, datalen);
u_int32_t new_from_fingerprint = old_from_fingerprint - node->rand4fingerprint*delta; u_int32_t new_from_fingerprint = old_from_fingerprint - node->rand4fingerprint*delta;
if (r!=0) return r; if (r!=0) return r;
if (t->txn_that_created != xid) { if (t->txn_that_created != xid) {
...@@ -816,7 +816,7 @@ static int push_a_brt_cmd_down (BRT t, BRTNODE node, BRTNODE child, int childnum ...@@ -816,7 +816,7 @@ static int push_a_brt_cmd_down (BRT t, BRTNODE node, BRTNODE child, int childnum
DBT *v = cmd->u.id.val; DBT *v = cmd->u.id.val;
//if (debug) printf("%s:%d %*sinserted down child_did_split=%d\n", __FILE__, __LINE__, debug, "", child_did_split); //if (debug) printf("%s:%d %*sinserted down child_did_split=%d\n", __FILE__, __LINE__, debug, "", child_did_split);
u_int32_t old_fingerprint = node->local_fingerprint; u_int32_t old_fingerprint = node->local_fingerprint;
u_int32_t new_fingerprint = old_fingerprint - node->rand4fingerprint*toku_calccrc32_cmdstruct(cmd); u_int32_t new_fingerprint = old_fingerprint - node->rand4fingerprint*toku_calc_fingerprint_cmdstruct(cmd);
node->local_fingerprint = new_fingerprint; node->local_fingerprint = new_fingerprint;
if (t->txn_that_created != cmd->xid) { if (t->txn_that_created != cmd->xid) {
int r = toku_log_brtdeq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum); int r = toku_log_brtdeq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum);
...@@ -910,7 +910,7 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum, ...@@ -910,7 +910,7 @@ static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
FIFO_ITERATE(old_h, skey, skeylen, sval, svallen, type, xid, FIFO_ITERATE(old_h, skey, skeylen, sval, svallen, type, xid,
({ ({
u_int32_t old_fingerprint = node->local_fingerprint; u_int32_t old_fingerprint = node->local_fingerprint;
u_int32_t new_fingerprint = old_fingerprint - node->rand4fingerprint*toku_calccrc32_cmd(type, xid, skey, skeylen, sval, svallen); u_int32_t new_fingerprint = old_fingerprint - node->rand4fingerprint*toku_calc_fingerprint_cmd(type, xid, skey, skeylen, sval, svallen);
if (t->txn_that_created != xid) { if (t->txn_that_created != xid) {
r = toku_log_brtdeq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum); r = toku_log_brtdeq(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, childnum);
assert(r==0); assert(r==0);
...@@ -1950,7 +1950,7 @@ static void verify_local_fingerprint_nonleaf (BRTNODE node) { ...@@ -1950,7 +1950,7 @@ static void verify_local_fingerprint_nonleaf (BRTNODE node) {
for (i=0; i<node->u.n.n_children; i++) for (i=0; i<node->u.n.n_children; i++)
FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid, FIFO_ITERATE(BNC_BUFFER(node,i), key, keylen, data, datalen, type, xid,
({ ({
fp += node->rand4fingerprint * toku_calccrc32_cmd(type, xid, key, keylen, data, datalen); fp += node->rand4fingerprint * toku_calc_fingerprint_cmd(type, xid, key, keylen, data, datalen);
})); }));
assert(fp==node->local_fingerprint); assert(fp==node->local_fingerprint);
} }
......
#include <sys/types.h>
#include <zlib.h>
// hack: include crc.h below so we can deprecate the call to crc32
inline u_int32_t toku_crc32 (u_int32_t oldcrc32, const void *data, u_int32_t len) {
if (len==0) return oldcrc32;
else return crc32((unsigned long)oldcrc32, data, (uInt)len);
}
// Hack
#include "crc.h"
#ifndef TOKU_CRC_H
#define TOKU_CRC_H
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved."
#include <sys/types.h>
#include <zlib.h>
// zlib crc32 has a bug: If len==0 then it should return oldcrc32, but crc32 returns 0.
inline u_int32_t toku_crc32 (u_int32_t oldcrc32, const void *data, u_int32_t len);
static const u_int32_t toku_null_crc = 0;
// Don't use crc32, use toku_crc32 to avoid that bug.
ZEXTERN uLong ZEXPORT crc32 OF((uLong crc, const Bytef *buf, uInt len)) __attribute__((deprecated));
#endif
...@@ -4,17 +4,17 @@ ...@@ -4,17 +4,17 @@
#include <arpa/inet.h> #include <arpa/inet.h>
#include "brt-internal.h" #include "brt-internal.h"
#include "toku_assert.h" #include "toku_assert.h"
#include "murmur.h"
// Calculate the fingerprint for a kvpair // Calculate the fingerprint for a kvpair
static inline u_int32_t toku_calc_more_crc32_kvpair (u_int32_t crc, const void *key, int keylen, const void *val, int vallen) { static void toku_calc_more_murmur_kvpair (struct murmur *mm, const void *key, int keylen, const void *val, int vallen) {
int i; int i;
i = htonl(keylen); i = htonl(keylen);
crc = toku_crc32(crc, (void*)&i, 4); murmur_add(mm, (void*)&i, 4);
crc = toku_crc32(crc, key, keylen); murmur_add(mm, key, keylen);
i = htonl(vallen); i = htonl(vallen);
crc = toku_crc32(crc, (void*)&i, 4); murmur_add(mm, (void*)&i, 4);
crc = toku_crc32(crc, val, vallen); murmur_add(mm, val, vallen);
return crc;
} }
#if 0 #if 0
...@@ -28,18 +28,20 @@ u_int32_t toku_calccrc32_kvpair_struct (const struct kv_pair *kvp) { ...@@ -28,18 +28,20 @@ u_int32_t toku_calccrc32_kvpair_struct (const struct kv_pair *kvp) {
} }
#endif #endif
u_int32_t toku_calccrc32_cmd (u_int32_t type, TXNID xid, const void *key, u_int32_t keylen, const void *val, u_int32_t vallen) { u_int32_t toku_calc_fingerprint_cmd (u_int32_t type, TXNID xid, const void *key, u_int32_t keylen, const void *val, u_int32_t vallen) {
unsigned char type_c = type; unsigned char type_c = type;
unsigned int a = htonl(xid>>32); unsigned int a = htonl(xid>>32);
unsigned int b = htonl(xid&0xffffffff); unsigned int b = htonl(xid&0xffffffff);
return toku_calc_more_crc32_kvpair(toku_crc32(toku_crc32(toku_crc32(toku_null_crc, struct murmur mm;
&type_c, 1), murmur_init(&mm);
&a, 4), murmur_add(&mm, &type_c, 1);
&b, 4), murmur_add(&mm, &a, 4);
key, keylen, val, vallen); murmur_add(&mm, &b, 4);
toku_calc_more_murmur_kvpair(&mm, key, keylen, val, vallen);
return murmur_finish(&mm);
} }
u_int32_t toku_calccrc32_cmdstruct (BRT_CMD cmd) { u_int32_t toku_calc_fingerprint_cmdstruct (BRT_CMD cmd) {
switch (cmd->type) { switch (cmd->type) {
case BRT_INSERT: case BRT_INSERT:
case BRT_DELETE_ANY: case BRT_DELETE_ANY:
...@@ -48,7 +50,7 @@ u_int32_t toku_calccrc32_cmdstruct (BRT_CMD cmd) { ...@@ -48,7 +50,7 @@ u_int32_t toku_calccrc32_cmdstruct (BRT_CMD cmd) {
case BRT_COMMIT_BOTH: case BRT_COMMIT_BOTH:
case BRT_ABORT_ANY: case BRT_ABORT_ANY:
case BRT_ABORT_BOTH: case BRT_ABORT_BOTH:
return toku_calccrc32_cmd (cmd->type, cmd->xid, cmd->u.id.key->data, cmd->u.id.key->size, cmd->u.id.val->data, cmd->u.id.val->size); return toku_calc_fingerprint_cmd (cmd->type, cmd->xid, cmd->u.id.key->data, cmd->u.id.key->size, cmd->u.id.val->data, cmd->u.id.val->size);
case BRT_NONE: case BRT_NONE:
return 0; return 0;
} }
......
#ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved." #ident "Copyright (c) 2007, 2008 Tokutek Inc. All rights reserved."
#include "brttypes.h" #include "brttypes.h"
#include "crc.h"
#include "leafentry.h" #include "leafentry.h"
#include "memory.h" #include "memory.h"
#include "toku_assert.h" #include "toku_assert.h"
...@@ -14,7 +13,7 @@ ...@@ -14,7 +13,7 @@
#include <string.h> #include <string.h>
u_int32_t toku_le_crc(LEAFENTRY v) { u_int32_t toku_le_crc(LEAFENTRY v) {
return toku_crc32(toku_null_crc, v, leafentry_memsize(v)); return murmur_string(v, leafentry_memsize(v));
} }
int le_committed (u_int32_t klen, void* kval, u_int32_t dlen, void* dval, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *result) { int le_committed (u_int32_t klen, void* kval, u_int32_t dlen, void* dval, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *result) {
...@@ -148,27 +147,27 @@ u_int32_t toku_logsizeof_LEAFENTRY (LEAFENTRY le) { ...@@ -148,27 +147,27 @@ u_int32_t toku_logsizeof_LEAFENTRY (LEAFENTRY le) {
return leafentry_disksize(le); return leafentry_disksize(le);
} }
int toku_fread_LEAFENTRY(FILE *f, LEAFENTRY *le, u_int32_t *crc, u_int32_t *len) { int toku_fread_LEAFENTRY(FILE *f, LEAFENTRY *le, struct murmur *murmur, u_int32_t *len) {
assert(0); assert(0);
u_int8_t state; u_int8_t state;
int r = toku_fread_u_int8_t (f, &state, crc, len); if (r!=0) return r; int r = toku_fread_u_int8_t (f, &state, murmur, len); if (r!=0) return r;
TXNID xid; TXNID xid;
BYTESTRING a,b,c; BYTESTRING a,b,c;
u_int32_t memsize, disksize; u_int32_t memsize, disksize;
switch ((enum le_state)state) { switch ((enum le_state)state) {
case LE_COMMITTED: case LE_COMMITTED:
r = toku_fread_BYTESTRING(f, &a, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &a, murmur, len); if (r!=0) return r;
r = toku_fread_BYTESTRING(f, &b, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &b, murmur, len); if (r!=0) return r;
r = le_committed(a.len, a.data, b.len, b.data, r = le_committed(a.len, a.data, b.len, b.data,
&memsize, &disksize, le); &memsize, &disksize, le);
toku_free_BYTESTRING(a); toku_free_BYTESTRING(a);
toku_free_BYTESTRING(b); toku_free_BYTESTRING(b);
return r; return r;
case LE_BOTH: case LE_BOTH:
r = toku_fread_TXNID(f, &xid, crc, len); if (r!=0) return r; r = toku_fread_TXNID(f, &xid, murmur, len); if (r!=0) return r;
r = toku_fread_BYTESTRING(f, &a, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &a, murmur, len); if (r!=0) return r;
r = toku_fread_BYTESTRING(f, &b, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &b, murmur, len); if (r!=0) return r;
r = toku_fread_BYTESTRING(f, &c, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &c, murmur, len); if (r!=0) return r;
r = le_both(xid, a.len, a.data, b.len, b.data, c.len, c.data, r = le_both(xid, a.len, a.data, b.len, b.data, c.len, c.data,
&memsize, &disksize, le); &memsize, &disksize, le);
toku_free_BYTESTRING(a); toku_free_BYTESTRING(a);
...@@ -176,18 +175,18 @@ int toku_fread_LEAFENTRY(FILE *f, LEAFENTRY *le, u_int32_t *crc, u_int32_t *len) ...@@ -176,18 +175,18 @@ int toku_fread_LEAFENTRY(FILE *f, LEAFENTRY *le, u_int32_t *crc, u_int32_t *len)
toku_free_BYTESTRING(c); toku_free_BYTESTRING(c);
return r; return r;
case LE_PROVDEL: case LE_PROVDEL:
r = toku_fread_TXNID(f, &xid, crc, len); if (r!=0) return r; r = toku_fread_TXNID(f, &xid, murmur, len); if (r!=0) return r;
r = toku_fread_BYTESTRING(f, &a, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &a, murmur, len); if (r!=0) return r;
r = toku_fread_BYTESTRING(f, &b, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &b, murmur, len); if (r!=0) return r;
r = le_provdel(xid, a.len, a.data, b.len, b.data, r = le_provdel(xid, a.len, a.data, b.len, b.data,
&memsize, &disksize, le); &memsize, &disksize, le);
toku_free_BYTESTRING(a); toku_free_BYTESTRING(a);
toku_free_BYTESTRING(b); toku_free_BYTESTRING(b);
return r; return r;
case LE_PROVPAIR: case LE_PROVPAIR:
r = toku_fread_TXNID(f, &xid, crc, len); if (r!=0) return r; r = toku_fread_TXNID(f, &xid, murmur, len); if (r!=0) return r;
r = toku_fread_BYTESTRING(f, &a, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &a, murmur, len); if (r!=0) return r;
r = toku_fread_BYTESTRING(f, &b, crc, len); if (r!=0) return r; r = toku_fread_BYTESTRING(f, &b, murmur, len); if (r!=0) return r;
r = le_provpair(xid, a.len, a.data, b.len, b.data, r = le_provpair(xid, a.len, a.data, b.len, b.data,
&memsize, &disksize, le); &memsize, &disksize, le);
toku_free_BYTESTRING(a); toku_free_BYTESTRING(a);
...@@ -244,9 +243,9 @@ int print_leafentry (FILE *outf, LEAFENTRY v) { ...@@ -244,9 +243,9 @@ int print_leafentry (FILE *outf, LEAFENTRY v) {
LESWITCHCALL(v, print, outf); LESWITCHCALL(v, print, outf);
} }
int toku_logprint_LEAFENTRY (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *format __attribute__((__unused__))) { int toku_logprint_LEAFENTRY (FILE *outf, FILE *inf, const char *fieldname, struct murmur *murmur, u_int32_t *len, const char *format __attribute__((__unused__))) {
LEAFENTRY v; LEAFENTRY v;
int r = toku_fread_LEAFENTRY(inf, &v, crc, len); int r = toku_fread_LEAFENTRY(inf, &v, murmur, len);
if (r!=0) return r; if (r!=0) return r;
fprintf(outf, " %s=", fieldname); fprintf(outf, " %s=", fieldname);
print_leafentry(outf, v); print_leafentry(outf, v);
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include "brttypes.h" #include "brttypes.h"
#include "rbuf.h" #include "rbuf.h"
#include "murmur.h"
#include <arpa/inet.h> #include <arpa/inet.h>
u_int32_t toku_le_crc(LEAFENTRY v); u_int32_t toku_le_crc(LEAFENTRY v);
...@@ -116,8 +117,8 @@ u_int32_t leafentry_disksize (LEAFENTRY le); // this is the same as logsizeof_LE ...@@ -116,8 +117,8 @@ u_int32_t leafentry_disksize (LEAFENTRY le); // this is the same as logsizeof_LE
u_int32_t toku_logsizeof_LEAFENTRY(LEAFENTRY le); u_int32_t toku_logsizeof_LEAFENTRY(LEAFENTRY le);
void wbuf_LEAFENTRY(struct wbuf *w, LEAFENTRY le); void wbuf_LEAFENTRY(struct wbuf *w, LEAFENTRY le);
void rbuf_LEAFENTRY(struct rbuf *r, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *le); void rbuf_LEAFENTRY(struct rbuf *r, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *le);
int toku_fread_LEAFENTRY(FILE *f, LEAFENTRY *le, u_int32_t *crc, u_int32_t *len); // read a leafentry from a log int toku_fread_LEAFENTRY(FILE *f, LEAFENTRY *le, struct murmur *, u_int32_t *len); // read a leafentry from a log
int toku_logprint_LEAFENTRY(FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *format); // read a leafentry from a log and then print it in human-readable form. int toku_logprint_LEAFENTRY(FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *format); // read a leafentry from a log and then print it in human-readable form.
void toku_free_LEAFENTRY(LEAFENTRY le); void toku_free_LEAFENTRY(LEAFENTRY le);
int print_leafentry (FILE *outf, LEAFENTRY v); // Print a leafentry out in human-readable form. int print_leafentry (FILE *outf, LEAFENTRY v); // Print a leafentry out in human-readable form.
......
This diff is collapsed.
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "brttypes.h" #include "brttypes.h"
#include "memory.h" #include "memory.h"
#include "bread.h" #include "bread.h"
#include "murmur.h"
struct logbytes; struct logbytes;
struct logbytes { struct logbytes {
...@@ -49,28 +50,28 @@ int toku_logger_log_newbrtnode (TOKUTXN txn, FILENUM filenum, DISKOFF offset, u_ ...@@ -49,28 +50,28 @@ int toku_logger_log_newbrtnode (TOKUTXN txn, FILENUM filenum, DISKOFF offset, u_
int toku_logger_fsync (TOKULOGGER logger); int toku_logger_fsync (TOKULOGGER logger);
int toku_fread_u_int8_t (FILE *f, u_int8_t *v, u_int32_t *crc, u_int32_t *len); int toku_fread_u_int8_t (FILE *f, u_int8_t *v, struct murmur *, u_int32_t *len);
int toku_fread_u_int32_t_nocrclen (FILE *f, u_int32_t *v); int toku_fread_u_int32_t_nocrclen (FILE *f, u_int32_t *v);
int toku_fread_u_int32_t (FILE *f, u_int32_t *v, u_int32_t *crc, u_int32_t *len); int toku_fread_u_int32_t (FILE *f, u_int32_t *v, struct murmur *, u_int32_t *len);
int toku_fread_LSN (FILE *f, LSN *lsn, u_int32_t *crc, u_int32_t *len); int toku_fread_LSN (FILE *f, LSN *lsn, struct murmur *, u_int32_t *len);
int toku_fread_FILENUM (FILE *f, FILENUM *filenum, u_int32_t *crc, u_int32_t *len); int toku_fread_FILENUM (FILE *f, FILENUM *filenum, struct murmur *, u_int32_t *len);
int toku_fread_DISKOFF (FILE *f, DISKOFF *diskoff, u_int32_t *crc, u_int32_t *len); int toku_fread_DISKOFF (FILE *f, DISKOFF *diskoff, struct murmur *, u_int32_t *len);
int toku_fread_TXNID (FILE *f, TXNID *txnid, u_int32_t *crc, u_int32_t *len); int toku_fread_TXNID (FILE *f, TXNID *txnid, struct murmur *, u_int32_t *len);
// fills in the bs with malloced data. // fills in the bs with malloced data.
int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, u_int32_t *crc, u_int32_t *len); int toku_fread_BYTESTRING (FILE *f, BYTESTRING *bs, struct murmur *, u_int32_t *len);
int toku_fread_LOGGEDBRTHEADER(FILE *f, LOGGEDBRTHEADER *v, u_int32_t *crc, u_int32_t *len); int toku_fread_LOGGEDBRTHEADER(FILE *f, LOGGEDBRTHEADER *v, struct murmur *, u_int32_t *len);
int toku_fread_INTPAIRARRAY (FILE *f, INTPAIRARRAY *v, u_int32_t *crc, u_int32_t *len); int toku_fread_INTPAIRARRAY (FILE *f, INTPAIRARRAY *v, struct murmur *, u_int32_t *len);
int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_LSN (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_TXNID (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_BYTESTRING (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_FILENUM (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
int toku_logprint_DISKOFF (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_DISKOFF (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
int toku_logprint_u_int8_t (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_u_int8_t (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
int toku_logprint_u_int32_t (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_u_int32_t (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
int toku_logprint_LOGGEDBRTHEADER (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_LOGGEDBRTHEADER (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
int toku_logprint_INTPAIRARRAY (FILE *outf, FILE *inf, const char *fieldname, u_int32_t *crc, u_int32_t *len, const char *); int toku_logprint_INTPAIRARRAY (FILE *outf, FILE *inf, const char *fieldname, struct murmur *, u_int32_t *len, const char *);
// Useful thing for printing a bytestring. // Useful thing for printing a bytestring.
void toku_print_BYTESTRING (FILE *outf, u_int32_t len, char *data); void toku_print_BYTESTRING (FILE *outf, u_int32_t len, char *data);
......
...@@ -375,17 +375,17 @@ void generate_log_writer (void) { ...@@ -375,17 +375,17 @@ void generate_log_writer (void) {
void generate_log_reader (void) { void generate_log_reader (void) {
DO_LOGTYPES(lt, ({ DO_LOGTYPES(lt, ({
fprintf(cf, "static int toku_log_fread_%s (FILE *infile, struct logtype_%s *data, u_int32_t crc)", lt->name, lt->name); fprintf(cf, "static int toku_log_fread_%s (FILE *infile, struct logtype_%s *data, struct murmur *murmur)", lt->name, lt->name);
fprintf(cf, " {\n"); fprintf(cf, " {\n");
fprintf(cf, " int r=0;\n"); fprintf(cf, " int r=0;\n");
fprintf(cf, " u_int32_t actual_len=5; // 1 for the command, 4 for the first len.\n"); fprintf(cf, " u_int32_t actual_len=5; // 1 for the command, 4 for the first len.\n");
fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, &crc, &actual_len); if (r!=0) return r;\n", "LSN", "lsn"); fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, murmur, &actual_len); if (r!=0) return r;\n", "LSN", "lsn");
DO_FIELDS(ft, lt, DO_FIELDS(ft, lt,
fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, &crc, &actual_len); if (r!=0) return r;\n", ft->type, ft->name)); fprintf(cf, " r=toku_fread_%-16s(infile, &data->%-16s, murmur, &actual_len); if (r!=0) return r;\n", ft->type, ft->name));
fprintf(cf, " u_int32_t crc_in_file, len_in_file;\n"); fprintf(cf, " u_int32_t murmur_in_file, len_in_file;\n");
fprintf(cf, " r=toku_fread_u_int32_t_nocrclen(infile, &crc_in_file); actual_len+=4; if (r!=0) return r;\n"); fprintf(cf, " r=toku_fread_u_int32_t_nocrclen(infile, &murmur_in_file); actual_len+=4; if (r!=0) return r;\n");
fprintf(cf, " r=toku_fread_u_int32_t_nocrclen(infile, &len_in_file); actual_len+=4; if (r!=0) return r;\n"); fprintf(cf, " r=toku_fread_u_int32_t_nocrclen(infile, &len_in_file); actual_len+=4; if (r!=0) return r;\n");
fprintf(cf, " if (crc_in_file!=crc || len_in_file!=actual_len) return DB_BADFORMAT;\n"); fprintf(cf, " if (murmur_in_file!=murmur_finish(murmur) || len_in_file!=actual_len) return DB_BADFORMAT;\n");
fprintf(cf, " return 0;\n"); fprintf(cf, " return 0;\n");
fprintf(cf, "}\n\n"); fprintf(cf, "}\n\n");
})); }));
...@@ -393,17 +393,19 @@ void generate_log_reader (void) { ...@@ -393,17 +393,19 @@ void generate_log_reader (void) {
fprintf(hf, ";\n"); fprintf(hf, ";\n");
fprintf(cf, " {\n"); fprintf(cf, " {\n");
fprintf(cf, " u_int32_t len1; int r;\n"); fprintf(cf, " u_int32_t len1; int r;\n");
fprintf(cf, " u_int32_t crc=0,ignorelen=0;\n"); fprintf(cf, " u_int32_t ignorelen=0;\n");
fprintf(cf, " r = toku_fread_u_int32_t(infile, &len1,&crc,&ignorelen); if (r!=0) return r;\n"); fprintf(cf, " struct murmur mm;\n");
fprintf(cf, " murmur_init(&mm);\n");
fprintf(cf, " r = toku_fread_u_int32_t(infile, &len1,&mm,&ignorelen); if (r!=0) return r;\n");
fprintf(cf, " int cmd=fgetc(infile);\n"); fprintf(cf, " int cmd=fgetc(infile);\n");
fprintf(cf, " if (cmd==EOF) return EOF;\n"); fprintf(cf, " if (cmd==EOF) return EOF;\n");
fprintf(cf, " char cmdchar = cmd;\n"); fprintf(cf, " char cmdchar = cmd;\n");
fprintf(cf, " crc = toku_crc32(crc, &cmdchar, 1);\n"); fprintf(cf, " murmur_add(&mm, &cmdchar, 1);\n");
fprintf(cf, " le->cmd=cmd;\n"); fprintf(cf, " le->cmd=cmd;\n");
fprintf(cf, " switch ((enum lt_cmd)cmd) {\n"); fprintf(cf, " switch ((enum lt_cmd)cmd) {\n");
DO_LOGTYPES(lt, ({ DO_LOGTYPES(lt, ({
fprintf(cf, " case LT_%s:\n", lt->name); fprintf(cf, " case LT_%s:\n", lt->name);
fprintf(cf, " return toku_log_fread_%s (infile, &le->u.%s, crc);\n", lt->name, lt->name); fprintf(cf, " return toku_log_fread_%s (infile, &le->u.%s, &mm);\n", lt->name, lt->name);
})); }));
fprintf(cf, " };\n"); fprintf(cf, " };\n");
fprintf(cf, " return DB_BADFORMAT;\n"); // Should read past the record using the len field. fprintf(cf, " return DB_BADFORMAT;\n"); // Should read past the record using the len field.
...@@ -417,14 +419,16 @@ void generate_logprint (void) { ...@@ -417,14 +419,16 @@ void generate_logprint (void) {
fprintf(cf, " {\n"); fprintf(cf, " {\n");
fprintf(cf, " int cmd, r;\n"); fprintf(cf, " int cmd, r;\n");
fprintf(cf, " u_int32_t len1, crc_in_file;\n"); fprintf(cf, " u_int32_t len1, crc_in_file;\n");
fprintf(cf, " u_int32_t crc = 0, ignorelen=0;\n"); fprintf(cf, " u_int32_t ignorelen=0;\n");
fprintf(cf, " r=toku_fread_u_int32_t(f, &len1, &crc, &ignorelen);\n"); fprintf(cf, " struct murmur mm;\n");
fprintf(cf, " murmur_init(&mm);\n");
fprintf(cf, " r=toku_fread_u_int32_t(f, &len1, &mm, &ignorelen);\n");
fprintf(cf, " if (r==EOF) return EOF;\n"); fprintf(cf, " if (r==EOF) return EOF;\n");
fprintf(cf, " cmd=fgetc(f);\n"); fprintf(cf, " cmd=fgetc(f);\n");
fprintf(cf, " if (cmd==EOF) return DB_BADFORMAT;\n"); fprintf(cf, " if (cmd==EOF) return DB_BADFORMAT;\n");
fprintf(cf, " u_int32_t len_in_file, len=1+4; // cmd + len1\n"); fprintf(cf, " u_int32_t len_in_file, len=1+4; // cmd + len1\n");
fprintf(cf, " char charcmd = cmd;\n"); fprintf(cf, " char charcmd = cmd;\n");
fprintf(cf, " crc = toku_crc32(crc, &charcmd, 1);\n"); fprintf(cf, " murmur_add(&mm, &charcmd, 1);\n");
fprintf(cf, " switch ((enum lt_cmd)cmd) {\n"); fprintf(cf, " switch ((enum lt_cmd)cmd) {\n");
DO_LOGTYPES(lt, ({ if (strlen(lt->name)>maxnamelen) maxnamelen=strlen(lt->name); })); DO_LOGTYPES(lt, ({ if (strlen(lt->name)>maxnamelen) maxnamelen=strlen(lt->name); }));
DO_LOGTYPES(lt, ({ DO_LOGTYPES(lt, ({
...@@ -434,20 +438,23 @@ void generate_logprint (void) { ...@@ -434,20 +438,23 @@ void generate_logprint (void) {
fprintf(cf, " fprintf(outf, \"%%-%ds \", \"%s\");\n", maxnamelen, lt->name); fprintf(cf, " fprintf(outf, \"%%-%ds \", \"%s\");\n", maxnamelen, lt->name);
if (isprint(cmd)) fprintf(cf," fprintf(outf, \" '%c':\");\n", cmd); if (isprint(cmd)) fprintf(cf," fprintf(outf, \" '%c':\");\n", cmd);
else fprintf(cf," fprintf(outf, \"0%03o:\");\n", cmd); else fprintf(cf," fprintf(outf, \"0%03o:\");\n", cmd);
fprintf(cf, " r = toku_logprint_%-16s(outf, f, \"lsn\", &crc, &len, 0); if (r!=0) return r;\n", "LSN"); fprintf(cf, " r = toku_logprint_%-16s(outf, f, \"lsn\", &mm, &len, 0); if (r!=0) return r;\n", "LSN");
DO_FIELDS(ft, lt, ({ DO_FIELDS(ft, lt, ({
fprintf(cf, " r = toku_logprint_%-16s(outf, f, \"%s\", &crc, &len,", ft->type, ft->name); fprintf(cf, " r = toku_logprint_%-16s(outf, f, \"%s\", &mm, &len,", ft->type, ft->name);
if (ft->format) fprintf(cf, "\"%s\"", ft->format); if (ft->format) fprintf(cf, "\"%s\"", ft->format);
else fprintf(cf, "0"); else fprintf(cf, "0");
fprintf(cf, "); if (r!=0) return r;\n"); fprintf(cf, "); if (r!=0) return r;\n");
})); }));
fprintf(cf, " r = toku_fread_u_int32_t_nocrclen (f, &crc_in_file); len+=4; if (r!=0) return r;\n"); fprintf(cf, " {\n");
fprintf(cf, " fprintf(outf, \" crc=%%08x\", crc_in_file);\n"); fprintf(cf, " u_int32_t actual_murmur = murmur_finish(&mm);\n");
fprintf(cf, " if (crc_in_file!=crc) fprintf(outf, \" actual_crc=%%08x\", crc);\n"); fprintf(cf, " r = toku_fread_u_int32_t_nocrclen (f, &crc_in_file); len+=4; if (r!=0) return r;\n");
fprintf(cf, " r = toku_fread_u_int32_t_nocrclen (f, &len_in_file); len+=4; if (r!=0) return r;\n"); fprintf(cf, " fprintf(outf, \" crc=%%08x\", crc_in_file);\n");
fprintf(cf, " fprintf(outf, \" len=%%d\", len_in_file);\n"); fprintf(cf, " if (crc_in_file!=actual_murmur) fprintf(outf, \" actual_fingerprint=%%08x\", actual_murmur);\n");
fprintf(cf, " if (len_in_file!=len) fprintf(outf, \" actual_len=%%d\", len);\n"); fprintf(cf, " r = toku_fread_u_int32_t_nocrclen (f, &len_in_file); len+=4; if (r!=0) return r;\n");
fprintf(cf, " if (len_in_file!=len || crc_in_file!=crc) return DB_BADFORMAT;\n"); fprintf(cf, " fprintf(outf, \" len=%%d\", len_in_file);\n");
fprintf(cf, " if (len_in_file!=len) fprintf(outf, \" actual_len=%%d\", len);\n");
fprintf(cf, " if (len_in_file!=len || crc_in_file!=actual_murmur) return DB_BADFORMAT;\n");
fprintf(cf, " };\n");
fprintf(cf, " fprintf(outf, \"\\n\");\n"); fprintf(cf, " fprintf(outf, \"\\n\");\n");
fprintf(cf, " return 0;;\n\n"); fprintf(cf, " return 0;;\n\n");
})); }));
......
#include <sys/types.h>
#include <assert.h>
#include <stdio.h>
#include <arpa/inet.h>
#include "murmur.h"
static const u_int32_t m = 0x5bd1e995;
static const int r = 24;
static const u_int32_t seed = 0x3dd3b51a;
static u_int32_t MurmurHash2 ( const void * key, int len)
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
// Initialize the hash to a 'random' value
u_int32_t h = seed;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
u_int32_t k = *(u_int32_t *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 29;
h *= m;
h ^= h >> 31;
return h;
}
void murmur_init (struct murmur *mm) {
mm->n_bytes_in_k=0;
mm->k =0;
mm->h = seed;
}
void murmur_add (struct murmur *mm, const void * key, unsigned int len) {
assert(mm->n_bytes_in_k<4);
const unsigned char *data = key;
u_int32_t h = mm->h;
{
int n_bytes_in_k = mm->n_bytes_in_k;
if (n_bytes_in_k>0) {
u_int32_t k = mm->k;
while (n_bytes_in_k<4 && len>0) {
k = (k << 8) | *data;
n_bytes_in_k++;
data++;
len--;
}
if (n_bytes_in_k==4) {
//printf(" oldh=%08x k=%08x", h, k);
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
mm->n_bytes_in_k = 0;
mm->k=0;
//printf(" h=%08x\n", h);
} else {
assert(len==0);
mm->n_bytes_in_k = n_bytes_in_k;
mm->k = k;
mm->h = h;
return;
}
}
}
// We've used up the partial bytes at the beginning of k.
assert(mm->n_bytes_in_k==0);
while (len >= 4) {
u_int32_t k = ntohl(*(u_int32_t *)data);
//printf(" oldh=%08x k=%08x", h, k);
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
//printf(" h=%08x\n", h);
}
mm->h=h;
//printf("%s:%d h=%08x\n", __FILE__, __LINE__, h);
{
u_int32_t k=0;
switch (len) {
case 3: k = *data << 16; data++;
case 2: k |= *data << 8; data++;
case 1: k |= *data;
}
mm->k = k;
mm->n_bytes_in_k = len;
//printf("now extra=%08x (%d bytes) n_bytes=%d\n", mm->k, len, mm->n_bytes_in_k);
}
}
u_int32_t murmur_finish (struct murmur *mm) {
u_int32_t h = mm->h;
if (mm->n_bytes_in_k>0) {
h ^= mm->k;
h *= m;
}
if (0) {
// The real murmur function does this extra mixing at the end. We don't need that for fingerprint.
h ^= h >> 29;
h *= m;
h ^= h >> 31;
}
return h;
}
void murmur_test_string (void *data, int len) {
u_int32_t v0 = MurmurHash2(data, len);
struct murmur mm;
murmur_init(&mm);
murmur_add(&mm, data, len);
u_int32_t v1 = murmur_finish(&mm);
assert(v0==v1);
}
void murmur_test_string_1_byte_at_a_time (void *data, int len) {
u_int32_t v0 = MurmurHash2(data, len);
struct murmur mm;
murmur_init(&mm);
int i;
for (i=0; i<len; i++) {
murmur_add(&mm, data+i, 1);
}
u_int32_t v1 = murmur_finish(&mm);
assert(v0==v1);
}
u_int32_t murmur_string (void *data, int len) {
struct murmur mm;
murmur_init(&mm);
murmur_add(&mm, data, len);
return murmur_finish(&mm);
}
#if 0
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
murmur_test_string("", 0);
char str[] = "abcdefghijklmnopqrstuvwyz";
u_int32_t i,j;
murmur_string(str, sizeof(str));
for (i=0; i<sizeof(str); i++) {
for (j=i; j<=i; j++) {
murmur_test_string(str+i, j-i);
murmur_test_string_1_byte_at_a_time(str+i, j-i);
}
}
return 0;
}
#endif
#ifndef MURMUR_H
#define MURMUR_H
#include <sys/types.h>
u_int32_t murmur_string (void *data, int len);
// This structure is designed to allow us to incrementally compute the murmur function.
// The murmur function operates on 4-byte values, and then if there are few bytes left at the end it handles them specially.
// Thus to perform the computation incrementally, we may end up with a few extra bytes. We must hang on to those extra bytes
// until we either get 4 byte (in which case murmur can run a little further) or until we ge to the end.
struct murmur {
int n_bytes_in_k; // How many bytes in k
u_int32_t k; // These are the extra bytes. Bytes are shifted into the low-order bits.
u_int32_t h; // The hash so far (up to the most recent 4-byte boundary)
};
void murmur_init (struct murmur *mm);
void murmur_add (struct murmur *mm, const void * key, unsigned int len);
u_int32_t murmur_finish (struct murmur *mm);
#endif
...@@ -288,7 +288,7 @@ void toku_recover_brtdeq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t c ...@@ -288,7 +288,7 @@ void toku_recover_brtdeq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t c
r = toku_fifo_peek(BNC_BUFFER(node, childnum), &actual_key, &actual_keylen, &actual_data, &actual_datalen, &actual_type, &actual_xid); r = toku_fifo_peek(BNC_BUFFER(node, childnum), &actual_key, &actual_keylen, &actual_data, &actual_datalen, &actual_type, &actual_xid);
assert(r==0); assert(r==0);
u_int32_t sizediff = actual_keylen + actual_datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD; u_int32_t sizediff = actual_keylen + actual_datalen + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
node->local_fingerprint -= node->rand4fingerprint * toku_calccrc32_cmd(actual_type, actual_xid, actual_key, actual_keylen, actual_data, actual_datalen); node->local_fingerprint -= node->rand4fingerprint * toku_calc_fingerprint_cmd(actual_type, actual_xid, actual_key, actual_keylen, actual_data, actual_datalen);
node->log_lsn = lsn; node->log_lsn = lsn;
node->u.n.n_bytes_in_buffers -= sizediff; node->u.n.n_bytes_in_buffers -= sizediff;
BNC_NBYTESINBUF(node, childnum) -= sizediff; BNC_NBYTESINBUF(node, childnum) -= sizediff;
...@@ -306,7 +306,7 @@ void toku_recover_brtenq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t c ...@@ -306,7 +306,7 @@ void toku_recover_brtenq (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t c
//printf("enq: %lld expected_old_fingerprint=%08x actual=%08x new=%08x\n", diskoff, oldfingerprint, node->local_fingerprint, newfingerprint); //printf("enq: %lld expected_old_fingerprint=%08x actual=%08x new=%08x\n", diskoff, oldfingerprint, node->local_fingerprint, newfingerprint);
r = toku_fifo_enq(BNC_BUFFER(node, childnum), key.data, key.len, data.data, data.len, typ, xid); r = toku_fifo_enq(BNC_BUFFER(node, childnum), key.data, key.len, data.data, data.len, typ, xid);
assert(r==0); assert(r==0);
node->local_fingerprint += node->rand4fingerprint * toku_calccrc32_cmd(typ, xid, key.data, key.len, data.data, data.len); node->local_fingerprint += node->rand4fingerprint * toku_calc_fingerprint_cmd(typ, xid, key.data, key.len, data.data, data.len);
node->log_lsn = lsn; node->log_lsn = lsn;
u_int32_t sizediff = key.len + data.len + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD; u_int32_t sizediff = key.len + data.len + KEY_VALUE_OVERHEAD + BRT_CMD_OVERHEAD;
r = toku_cachetable_unpin(cf, diskoff, node->fullhash, 1, toku_serialize_brtnode_size(node)); r = toku_cachetable_unpin(cf, diskoff, node->fullhash, 1, toku_serialize_brtnode_size(node));
......
...@@ -36,7 +36,7 @@ SUMMARIZE_CMD = ...@@ -36,7 +36,7 @@ SUMMARIZE_CMD =
endif endif
CFLAGS = -Wall -W -Wcast-align -Wbad-function-cast -Wextra -Wmissing-noreturn -Wmissing-format-attribute $(OPTFLAGS) -g3 -ggdb3 $(GCOV_FLAGS) $(PROF_FLAGS) -Werror $(FPICFLAGS) -Wshadow -fvisibility=hidden CFLAGS = -Wall -W -Wcast-align -Wbad-function-cast -Wextra -Wmissing-noreturn -Wmissing-format-attribute $(OPTFLAGS) -g3 -ggdb3 $(GCOV_FLAGS) $(PROF_FLAGS) -Werror $(FPICFLAGS) -Wshadow -fvisibility=hidden
LDFLAGS = $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS) -lz LDFLAGS = $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS)
CPPFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_XOPEN_SOURCE=500 -I.. CPPFLAGS += -D_FILE_OFFSET_BITS=64 -D_LARGEFILE64_SOURCE -D_XOPEN_SOURCE=500 -I..
# Put these one-per-line so that if we insert a new one the svn diff can understand it better. # Put these one-per-line so that if we insert a new one the svn diff can understand it better.
...@@ -89,7 +89,6 @@ BINS = $(REGRESSION_TESTS) \ ...@@ -89,7 +89,6 @@ BINS = $(REGRESSION_TESTS) \
# This line intentially kept commented so I can have a \ on the end of the previous line # This line intentially kept commented so I can have a \ on the end of the previous line
CHECKS = \ CHECKS = \
load_version_6 \
benchmarktest_256 \ benchmarktest_256 \
test-assertA test-assertB \ test-assertA test-assertB \
$(REGRESSION_TESTS) \ $(REGRESSION_TESTS) \
...@@ -105,10 +104,6 @@ check_fail: ...@@ -105,10 +104,6 @@ check_fail:
check_ok: check_ok:
test 0 = 0 $(SUMMARIZE_CMD) test 0 = 0 $(SUMMARIZE_CMD)
check_load_version_6:
(cp bench.db.ver6 bench.db.ver6.tmp && \
$(VGRIND) ../brtdump bench.db.ver6.tmp > /dev/null ) $(SUMMARIZE_CMD)
check_benchmarktest_256: benchmark-test check_benchmarktest_256: benchmark-test
$(VGRIND) ./benchmark-test $(VERBVERBOSE) --valsize 256 --verify 1 $(SUMMARIZE_CMD) $(VGRIND) ./benchmark-test $(VERBVERBOSE) --valsize 256 --verify 1 $(SUMMARIZE_CMD)
......
...@@ -28,7 +28,7 @@ static void test_serialize(void) { ...@@ -28,7 +28,7 @@ static void test_serialize(void) {
sn.thisnodename = sn.nodesize*20; sn.thisnodename = sn.nodesize*20;
sn.disk_lsn.lsn = 789; sn.disk_lsn.lsn = 789;
sn.log_lsn.lsn = 123456; sn.log_lsn.lsn = 123456;
sn.layout_version = BRT_LAYOUT_VERSION_7; sn.layout_version = BRT_LAYOUT_VERSION;
sn.height = 1; sn.height = 1;
sn.rand4fingerprint = randval; sn.rand4fingerprint = randval;
sn.local_fingerprint = 0; sn.local_fingerprint = 0;
...@@ -46,9 +46,9 @@ static void test_serialize(void) { ...@@ -46,9 +46,9 @@ static void test_serialize(void) {
BNC_SUBTREE_LEAFENTRY_ESTIMATE(&sn, 1) = random() + (((long long)random())<<32); BNC_SUBTREE_LEAFENTRY_ESTIMATE(&sn, 1) = random() + (((long long)random())<<32);
r = toku_fifo_create(&BNC_BUFFER(&sn,0)); assert(r==0); r = toku_fifo_create(&BNC_BUFFER(&sn,0)); assert(r==0);
r = toku_fifo_create(&BNC_BUFFER(&sn,1)); assert(r==0); r = toku_fifo_create(&BNC_BUFFER(&sn,1)); assert(r==0);
r = toku_fifo_enq(BNC_BUFFER(&sn,0), "a", 2, "aval", 5, BRT_NONE, (TXNID)0); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, (TXNID)0, "a", 2, "aval", 5); r = toku_fifo_enq(BNC_BUFFER(&sn,0), "a", 2, "aval", 5, BRT_NONE, (TXNID)0); assert(r==0); sn.local_fingerprint += randval*toku_calc_fingerprint_cmd(BRT_NONE, (TXNID)0, "a", 2, "aval", 5);
r = toku_fifo_enq(BNC_BUFFER(&sn,0), "b", 2, "bval", 5, BRT_NONE, (TXNID)123); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, (TXNID)123, "b", 2, "bval", 5); r = toku_fifo_enq(BNC_BUFFER(&sn,0), "b", 2, "bval", 5, BRT_NONE, (TXNID)123); assert(r==0); sn.local_fingerprint += randval*toku_calc_fingerprint_cmd(BRT_NONE, (TXNID)123, "b", 2, "bval", 5);
r = toku_fifo_enq(BNC_BUFFER(&sn,1), "x", 2, "xval", 5, BRT_NONE, (TXNID)234); assert(r==0); sn.local_fingerprint += randval*toku_calccrc32_cmd(BRT_NONE, (TXNID)234, "x", 2, "xval", 5); r = toku_fifo_enq(BNC_BUFFER(&sn,1), "x", 2, "xval", 5, BRT_NONE, (TXNID)234); assert(r==0); sn.local_fingerprint += randval*toku_calc_fingerprint_cmd(BRT_NONE, (TXNID)234, "x", 2, "xval", 5);
BNC_NBYTESINBUF(&sn, 0) = 2*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5); BNC_NBYTESINBUF(&sn, 0) = 2*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5);
BNC_NBYTESINBUF(&sn, 1) = 1*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5); BNC_NBYTESINBUF(&sn, 1) = 1*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5);
sn.u.n.n_bytes_in_buffers = 3*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5); sn.u.n.n_bytes_in_buffers = 3*(BRT_CMD_OVERHEAD+KEY_VALUE_OVERHEAD+2+5);
...@@ -60,7 +60,7 @@ static void test_serialize(void) { ...@@ -60,7 +60,7 @@ static void test_serialize(void) {
assert(dn->thisnodename==nodesize*20); assert(dn->thisnodename==nodesize*20);
assert(dn->disk_lsn.lsn==123456); assert(dn->disk_lsn.lsn==123456);
assert(dn->layout_version ==BRT_LAYOUT_VERSION_7); assert(dn->layout_version ==BRT_LAYOUT_VERSION);
assert(dn->height == 1); assert(dn->height == 1);
assert(dn->rand4fingerprint==randval); assert(dn->rand4fingerprint==randval);
assert(dn->u.n.n_children==2); assert(dn->u.n.n_children==2);
......
/* Bradley's variation of murmur.
* 1) It operates on 64-bit values at a time.
* 2) It can be computed increntally.
*/
#include <sys/types.h>
#include <sys/time.h>
#include <stdlib.h>
#include <stdio.h>
#include <arpa/inet.h>
#include <string.h>
#include <assert.h>
#include "zlib.h"
#include "murmur.h"
static inline u_int64_t ntoh64 (u_int64_t v) {
asm("bswapq %%rax" : "=a" (v) : "a" (v));
return v;
}
#if 0
u_int64_t ntoh64 (u_int64_t v) {
return ntohl(v>>32) | (((u_int64_t)ntohl(v&0xffffffff))<<32);
}
#endif
u_int64_t bmurmur (const void *key, int len) {
const u_int64_t m = 0xd8e9509a5bd1e995;
const int r = 32;
const u_int64_t seed = 0x6511611f3dd3b51a;
u_int64_t h = seed^len;
const unsigned char *data = key;
while (len>=8) {
u_int64_t k = ntohl(*(u_int64_t*)data);
k *= m;
k ^= k>>r;
k *= m;
h *= m;
h ^= k;
data += 8;
len -= 8;
}
switch(len) {
case 7: h ^= ((u_int64_t)data[6]) << (6*8);
case 6: h ^= ((u_int64_t)data[5])<< (5*8);
case 5: h ^= ((u_int64_t)data[4])<< (4*8);
case 4: h ^= ((u_int64_t)data[3])<< (3*8);
case 3: h ^= ((u_int64_t)data[2])<< (2*8);
case 2: h ^= ((u_int64_t)data[1])<< (1*8);
case 1: h ^= ((u_int64_t)data[0])<< (0*8);
}
h *= m;
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 29;
h *= m;
h ^= h >> 31;
return h;
}
// network-order bmurmur
u_int64_t bmurmurN (const void *key, int len) {
const u_int64_t m = 0xd8e9509a5bd1e995;
const int r = 32;
const u_int64_t seed = 0x6511611f3dd3b51a;
u_int64_t h = seed^len;
const unsigned char *data = key;
while (len>=8) {
u_int64_t k = ntoh64(*(u_int64_t*)data);
k *= m;
k ^= k>>r;
k *= m;
h *= m;
h ^= k;
data += 8;
len -= 8;
}
switch(len) {
case 7: h ^= ((u_int64_t)data[6]) << (6*8);
case 6: h ^= ((u_int64_t)data[5])<< (5*8);
case 5: h ^= ((u_int64_t)data[4])<< (4*8);
case 4: h ^= ((u_int64_t)data[3])<< (3*8);
case 3: h ^= ((u_int64_t)data[2])<< (2*8);
case 2: h ^= ((u_int64_t)data[1])<< (1*8);
case 1: h ^= ((u_int64_t)data[0])<< (0*8);
}
h *= m;
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 29;
h *= m;
h ^= h >> 31;
return h;
}
unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
{
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
const unsigned int m = 0x5bd1e995;
const int r = 24;
// Initialize the hash to a 'random' value
unsigned int h = seed ^ 0;
// Mix 4 bytes at a time into the hash
const unsigned char * data = (const unsigned char *)key;
while(len >= 4)
{
unsigned int k = *(unsigned int *)data;
k *= m;
k ^= k >> r;
k *= m;
h *= m;
h ^= k;
data += 4;
len -= 4;
}
// Handle the last few bytes of the input array
//printf("%s:%d h=%08x\n", __FILE__, __LINE__, h);
switch(len)
{
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0];
h *= m;
};
// Do a few final mixes of the hash to ensure the last few
// bytes are well-incorporated.
h ^= h >> 29;
h *= m;
h ^= h >> 31;
//printf("after Final=%08x\n", h);
return h;
}
int n;
char *buf;
void setup(void) {
n=1<<20;
buf=malloc(n);
int i;
for (i=0; i<n; i++) {
buf[i]=random();
}
}
double tdiff (struct timeval *end, struct timeval *start) {
return end->tv_sec-start->tv_sec + 1e-6*(end->tv_usec - start->tv_usec);
}
#define TIMEIT(str,x) ({ struct timeval start,end; \
gettimeofday(&start, 0); \
for (j=0; j<10; j++) x; \
gettimeofday(&end, 0); \
double t = tdiff(&end, &start); \
printf("%s t=%9.6f r=%7.1fMB/s", str, t, n*1e-5/t); \
})
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
int i;
{
struct murmur mm;
u_int32_t ah = murmur_string("abcdefgh", 8);
murmur_init(&mm);
for (i=0; i<8; i++) {
char a='a'+i;
murmur_add(&mm, &a, 1);
}
u_int32_t ih = murmur_finish(&mm);
assert(ih==ah);
}
for (i=0; i<8; i++) {
char v[8];
memset(v, 0, 8);
v[i]=1;
u_int64_t nv = *(u_int64_t*)&v[0];
u_int64_t hv = ntoh64(nv);
u_int64_t expect = (1ULL << (8*(7-i)));
//printf("nv=%016llx\nhv=%016llx\nE =%016llx\n", (unsigned long long)nv, (unsigned long long) hv, (unsigned long long)expect);
assert(hv==expect);
}
u_int64_t h=0,h2;
u_int32_t h3;
for (i=0; i<10; i++) {
int j;
setup();
h=0;
TIMEIT("bm ", h+=(h2=bmurmur(buf, n))); printf(" t=%016llx\n", (unsigned long long)h2);
TIMEIT("bm+1 ", h+=(h2=bmurmur(buf+1, n-1))); printf(" t=%016llx\n", (unsigned long long)h2);
TIMEIT("m2 ", h+=(h3=MurmurHash2(buf, n, 0x3dd3b51a))); printf(" t=%08x\n", h3);
h=0; TIMEIT("m2+1 ", h+=(h3=MurmurHash2(buf+1, n-1, 0x3dd3b51a))); printf(" t=%08x\n", h3);
h=0; TIMEIT("mm ", h+=(h3=murmur_string(buf, n))); printf(" t=%08x\n", h3);
h=0; TIMEIT("mm+1 ", h+=(h3=murmur_string(buf+1, n-1))); printf(" t=%08x\n", h3);
{
struct murmur mm;
h=0; TIMEIT("mm(2)", ({murmur_init(&mm); murmur_add(&mm, buf, n/2); murmur_add(&mm, buf, n-n/2); h+=(h2=murmur_finish(&mm));})); printf(" t=%08llx\n", (unsigned long long)h2);
}
h=0; TIMEIT("crc ", h+=(h3=crc32(0L, (Bytef*)buf, n))); printf(" t=%08x\n", h3);
h=0; TIMEIT("crc+1", h+=(h3=crc32(0L, (Bytef*)buf+1, n-1))); printf(" t=%08x\n", h3);
printf("\n");
}
printf("h=%llu\n", (unsigned long long)h);
printf("M2(0)=%08x\n", MurmurHash2("", 0, 0x3dd3b51a));
printf("m2(0)=%08x\n", murmur_string("", 0));
printf("M2(1)=%08x\n", MurmurHash2("a", 1, 0x3dd3b51a));
printf("m2(1)=%08x\n", murmur_string("a", 1));
printf("M2(4)=%08x\n", MurmurHash2("abcd", 4, 0x3dd3b51a));
printf("m2(4)=%08x\n", murmur_string("abcd", 4));
return 0;
}
...@@ -3,18 +3,13 @@ ...@@ -3,18 +3,13 @@
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved." #ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "murmur.h"
#include "memory.h" #include "memory.h"
#include "toku_assert.h" #include "toku_assert.h"
#include <errno.h> #include <errno.h>
#include <string.h> #include <string.h>
//#define CRC_NO
#define CRC_INCR #define CRC_INCR
//#define CRC_ATEND
#ifndef CRC_NO
#include "crc.h"
#endif
/* When serializing a value, write it into a buffer. */ /* When serializing a value, write it into a buffer. */
/* This code requires that the buffer be big enough to hold whatever you put into it. */ /* This code requires that the buffer be big enough to hold whatever you put into it. */
...@@ -24,27 +19,21 @@ struct wbuf { ...@@ -24,27 +19,21 @@ struct wbuf {
unsigned char *buf; unsigned char *buf;
unsigned int size; unsigned int size;
unsigned int ndone; unsigned int ndone;
#ifdef CRC_INCR struct murmur murmur; // The murmur state
u_int32_t crc32; // A 32-bit CRC of everything written so foar.
#endif
}; };
static inline void wbuf_init (struct wbuf *w, void *buf, DISKOFF size) { static inline void wbuf_init (struct wbuf *w, void *buf, DISKOFF size) {
w->buf=buf; w->buf=buf;
w->size=size; w->size=size;
w->ndone=0; w->ndone=0;
#ifdef CRC_INCR murmur_init(&w->murmur);
w->crc32 = toku_crc32(toku_null_crc, Z_NULL, 0);
#endif
} }
/* Write a character. */ /* Write a character. */
static inline void wbuf_char (struct wbuf *w, unsigned int ch) { static inline void wbuf_char (struct wbuf *w, unsigned int ch) {
assert(w->ndone<w->size); assert(w->ndone<w->size);
w->buf[w->ndone++]=ch; w->buf[w->ndone++]=ch;
#ifdef CRC_INCR murmur_add(&w->murmur, &w->buf[w->ndone-1], 1);
w->crc32 = toku_crc32(w->crc32, &w->buf[w->ndone-1], 1);
#endif
} }
static void wbuf_int (struct wbuf *w, int32_t i) { static void wbuf_int (struct wbuf *w, int32_t i) {
...@@ -63,9 +52,7 @@ static void wbuf_int (struct wbuf *w, int32_t i) { ...@@ -63,9 +52,7 @@ static void wbuf_int (struct wbuf *w, int32_t i) {
#else #else
*(u_int32_t*)(&w->buf[w->ndone]) = htonl(i); *(u_int32_t*)(&w->buf[w->ndone]) = htonl(i);
#endif #endif
#ifdef CRC_INCR murmur_add(&w->murmur, &w->buf[w->ndone], 4);
w->crc32 = toku_crc32(w->crc32, &w->buf[w->ndone], 4);
#endif
w->ndone += 4; w->ndone += 4;
#endif #endif
} }
...@@ -80,9 +67,7 @@ static inline void wbuf_literal_bytes(struct wbuf *w, bytevec bytes_bv, u_int32_ ...@@ -80,9 +67,7 @@ static inline void wbuf_literal_bytes(struct wbuf *w, bytevec bytes_bv, u_int32_
#else #else
assert(w->ndone + nbytes <= w->size); assert(w->ndone + nbytes <= w->size);
memcpy(w->buf + w->ndone, bytes, (size_t)nbytes); memcpy(w->buf + w->ndone, bytes, (size_t)nbytes);
#ifdef CRC_INCR murmur_add(&w->murmur, &w->buf[w->ndone], nbytes);
w->crc32 = toku_crc32(w->crc32, &w->buf[w->ndone], nbytes);
#endif
w->ndone += nbytes; w->ndone += nbytes;
#endif #endif
......
...@@ -82,7 +82,7 @@ RANGETREE_BINS = range_tree/rangetree.o ...@@ -82,7 +82,7 @@ RANGETREE_BINS = range_tree/rangetree.o
LOCKTREE_BINS = lock_tree/locktree.o lock_tree/rth.o lock_tree/lth.o lock_tree/idlth.o lock_tree/db_id.o $(RANGETREE_BINS) LOCKTREE_BINS = lock_tree/locktree.o lock_tree/rth.o lock_tree/lth.o lock_tree/idlth.o lock_tree/db_id.o $(RANGETREE_BINS)
$(LIBRARY): $(DBBINS) | buildlocktrees $(LIBRARY): $(DBBINS) | buildlocktrees
$(CC) $(CPPFLAGS) $^ $(LOCKTREE_BINS) $(SHARED) -o $@ $(CFLAGS) -lz $(RPATHNAME) $(CC) $(CPPFLAGS) $^ $(LOCKTREE_BINS) $(SHARED) -o $@ $(CFLAGS) $(RPATHNAME)
$(LIBNAME).a: $(DBBINS) | buildlocktrees $(LIBNAME).a: $(DBBINS) | buildlocktrees
$(AR) cr $@ $^ $(LOCKTREE_BINS) $(AR) cr $@ $^ $(LOCKTREE_BINS)
...@@ -90,7 +90,7 @@ $(LIBNAME).a: $(DBBINS) | buildlocktrees ...@@ -90,7 +90,7 @@ $(LIBNAME).a: $(DBBINS) | buildlocktrees
$(LIBNAME).a(ydb.o): ydb.o $(LIBNAME).a(ydb.o): ydb.o
$(TLIBRARY): $(TDBBINS) | buildlocktrees $(TLIBRARY): $(TDBBINS) | buildlocktrees
$(CC) $(CPPFLAGS) $^ $(LOCKTREE_BINS) $(SHARED) -o $@ $(CFLAGS) -lz $(RPATHNAME) $(CC) $(CPPFLAGS) $^ $(LOCKTREE_BINS) $(SHARED) -o $@ $(CFLAGS) $(RPATHNAME)
$(TLIBNAME).a: $(TDBBINS) | buildlocktrees $(TLIBNAME).a: $(TDBBINS) | buildlocktrees
$(AR) cr $@ $^ $(LOCKTREE_BINS) $(AR) cr $@ $^ $(LOCKTREE_BINS)
......
...@@ -21,4 +21,4 @@ $(ROOT)lib/libtokudb.a: $(SRC_OFILES) ...@@ -21,4 +21,4 @@ $(ROOT)lib/libtokudb.a: $(SRC_OFILES)
$(AR) rv $@ $^ $(AR) rv $@ $^
$(ROOT)lib/libtokudb.$(LIBEXT): $(SRC_OFILES) $(ROOT)lib/libtokudb.$(LIBEXT): $(SRC_OFILES)
cc $^ $(SHARED) -o $@ $(CFLAGS) -lz $(RPATHNAME) cc $^ $(SHARED) -o $@ $(CFLAGS) $(RPATHNAME)
...@@ -118,13 +118,13 @@ foo: ...@@ -118,13 +118,13 @@ foo:
echo ../locktree.h test.h $(LT_LINEAR) echo ../locktree.h test.h $(LT_LINEAR)
%.lin: %.c ../locktree.h test.h $(LT_LINEAR) %.lin: %.c ../locktree.h test.h $(LT_LINEAR)
$(CC) -DDIR=\"dir.$<.lin\" $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LT_LINEAR) $(NEWBRT_BINS) -lz $(CC) -DDIR=\"dir.$<.lin\" $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LT_LINEAR) $(NEWBRT_BINS)
%.tlin: %.c ../locktree.h test.h $(LT_TLINEAR) %.tlin: %.c ../locktree.h test.h $(LT_TLINEAR)
$(CC) -DDIR=\"dir.$<.tlin\" $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LT_TLINEAR) $(NEWBRT_BINS) -DTOKU_RT_NOOVERLAPS -lz $(CC) -DDIR=\"dir.$<.tlin\" $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LT_TLINEAR) $(NEWBRT_BINS) -DTOKU_RT_NOOVERLAPS
%.tlog: %.c ../locktree.h test.h $(LT_TLOG) %.tlog: %.c ../locktree.h test.h $(LT_TLOG)
$(CC) -DDIR=\"dir.$<.tlog\" $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LT_TLOG) $(NEWBRT_BINS) -DTOKU_RT_NOOVERLAPS -lz $(CC) -DDIR=\"dir.$<.tlog\" $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LT_TLOG) $(NEWBRT_BINS) -DTOKU_RT_NOOVERLAPS
%.log: %.c ../locktree.h test.h $(LT_LOG) %.log: %.c ../locktree.h test.h $(LT_LOG)
$(CC) -DDIR=\"dir.$<.log\" $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LT_LOG) $(NEWBRT_BINS) -lz $(CC) -DDIR=\"dir.$<.log\" $(CPPFLAGS) $(CFLAGS) $< -o $@ $(LT_LOG) $(NEWBRT_BINS)
clean: clean:
rm -f $(ALL_TESTS) *.o *.gcno *.gcda *.gcov rm -f $(ALL_TESTS) *.o *.gcno *.gcda *.gcov
......
...@@ -15,7 +15,7 @@ OPTFLAGS=-O0 ...@@ -15,7 +15,7 @@ OPTFLAGS=-O0
# GCOV_FLAGS = -fprofile-arcs -ftest-coverage # GCOV_FLAGS = -fprofile-arcs -ftest-coverage
CFLAGS = -W -Wall -Wextra -Werror $(OPTFLAGS) -g3 -ggdb3 $(GCOV_FLAGS) CFLAGS = -W -Wall -Wextra -Werror $(OPTFLAGS) -g3 -ggdb3 $(GCOV_FLAGS)
CFLAGS += -Wbad-function-cast -Wcast-align -Wconversion -Waggregate-return CFLAGS += -Wbad-function-cast -Wcast-align -Wconversion -Waggregate-return
CFLAGS += -Wmissing-noreturn -Wmissing-format-attribute -lz CFLAGS += -Wmissing-noreturn -Wmissing-format-attribute
CPPFLAGS += -I../ -I../../../newbrt -I../../../include CPPFLAGS += -I../ -I../../../newbrt -I../../../include
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment