Commit fc631f31 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Speed things up by shrinking the hash table properly

git-svn-id: file:///svn/tokudb@42 c7de825b-a66e-492c-adef-691d508d4ae1
parent 166aea54
...@@ -13,11 +13,13 @@ long long parsell (char *s) { ...@@ -13,11 +13,13 @@ long long parsell (char *s) {
int main (int argc, char *argv[]) { int main (int argc, char *argv[]) {
long long i; long long i;
assert(argc==3); assert(argc==4);
long long count=parsell(argv[1]); long long count=parsell(argv[1]);
long long range=100*parsell(argv[2]); long long range=100*parsell(argv[2]);
long long seed =parsell(argv[3]);
srandom(seed);
for (i=0; i<count; i++) { for (i=0; i<count; i++) {
printf("%lld\t%d\n", (random()%range), random()); printf("%lld\t%ld\n", (random()%range), random());
} }
return 0; return 0;
} }
......
# GCOV_FLAGS = -fprofile-arcs -ftest-coverage # GCOV_FLAGS = -fprofile-arcs -ftest-coverage
#PROF_FLAGS = -pg # PROF_FLAGS = -pg
#OPTFLAGS = -O2 #OPTFLAGS = -O2
CFLAGS = -Wall -W $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS) -Werror -fPIC CFLAGS = -Wall -W $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS) -Werror -fPIC
LDFLAGS = $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS) LDFLAGS = $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS)
...@@ -35,6 +35,7 @@ hashtest: hashtable.o memory.o ...@@ -35,6 +35,7 @@ hashtest: hashtable.o memory.o
brt-serialize.o: brt.h cachetable.h memory.h mdict.h pma.h brttypes.h brt-internal.h hashtable.h brt-serialize.o: brt.h cachetable.h memory.h mdict.h pma.h brttypes.h brt-internal.h hashtable.h
header-io.o: brttypes.h brt-internal.h memory.h header-io.o: brttypes.h brt-internal.h memory.h
mdict-test: hashtable.o pma.o memory.o mdict-test: hashtable.o pma.o memory.o
brt-bigtest: memory.o ybt.o brt.o pma.o cachetable.o key.o hashtable.o brt-serialize.o
brt-serialize-test: brt-serialize-test.o brt-serialize.o memory.o hashtable.o pma.o key.o ybt.o brt-serialize-test: brt-serialize-test.o brt-serialize.o memory.o hashtable.o pma.o key.o ybt.o
......
...@@ -173,6 +173,12 @@ static unsigned int hash_key (const char *key, int keylen) { ...@@ -173,6 +173,12 @@ static unsigned int hash_key (const char *key, int keylen) {
return hash; return hash;
} }
unsigned int ct_hash_longlong (unsigned long long l) {
unsigned int r = hash_key((char*)&l, 8);
printf("%lld --> %d --> %d\n", l, r, r%64);
return r;
}
static unsigned int hashit (CACHETABLE t, CACHEKEY key) { static unsigned int hashit (CACHETABLE t, CACHEKEY key) {
return hash_key((char*)&key, sizeof(key))%t->table_size; return hash_key((char*)&key, sizeof(key))%t->table_size;
} }
......
...@@ -63,6 +63,27 @@ int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, I ...@@ -63,6 +63,27 @@ int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, I
} }
} }
int toku_hash_rehash_everything (HASHTABLE tab, int newarraysize) {
HASHELT *newarray = toku_calloc(newarraysize, sizeof(*tab->array));
int i;
assert(newarray!=0);
for (i=0; i<newarraysize; i++) newarray[i]=0;
for (i=0; i<tab->arraysize; i++) {
HASHELT he;
while ((he=tab->array[i])!=0) {
unsigned int h = hash_key(he->key, he->keylen)%newarraysize;
tab->array[i] = he->next;
he->next = newarray[h];
newarray[h] = he;
}
}
toku_free(tab->array);
// printf("Freed\n");
tab->array=newarray;
tab->arraysize=newarraysize;
//printf("Done growing or shrinking\n");
return 0;
}
int toku_hash_insert (HASHTABLE tab, const char *key, ITEMLEN keylen, const char *val, ITEMLEN vallen) int toku_hash_insert (HASHTABLE tab, const char *key, ITEMLEN keylen, const char *val, ITEMLEN vallen)
{ {
...@@ -85,24 +106,7 @@ int toku_hash_insert (HASHTABLE tab, const char *key, ITEMLEN keylen, const char ...@@ -85,24 +106,7 @@ int toku_hash_insert (HASHTABLE tab, const char *key, ITEMLEN keylen, const char
tab->array[h]=he; tab->array[h]=he;
tab->n_keys++; tab->n_keys++;
if (tab->n_keys > tab->arraysize) { if (tab->n_keys > tab->arraysize) {
int newarraysize = tab->arraysize*2; return toku_hash_rehash_everything(tab, tab->arraysize*2);
HASHELT *newarray = toku_calloc(newarraysize, sizeof(*tab->array));
int i;
assert(newarray!=0);
for (i=0; i<newarraysize; i++) newarray[i]=0;
for (i=0; i<tab->arraysize; i++) {
while ((he=tab->array[i])!=0) {
h = hash_key(he->key, he->keylen)%newarraysize;
tab->array[i] = he->next;
he->next = newarray[h];
newarray[h] = he;
}
}
toku_free(tab->array);
// printf("Freed\n");
tab->array=newarray;
tab->arraysize=newarraysize;
//printf("Done growing\n");
} }
return BRT_OK; return BRT_OK;
} }
...@@ -122,6 +126,10 @@ int toku_hash_delete (HASHTABLE tab, const char *key, ITEMLEN keylen) { ...@@ -122,6 +126,10 @@ int toku_hash_delete (HASHTABLE tab, const char *key, ITEMLEN keylen) {
toku_free(he->val); toku_free(he->val);
toku_free(he); toku_free(he);
tab->n_keys--; tab->n_keys--;
if ((tab->n_keys * 4 < tab->arraysize) && tab->arraysize>4) {
return toku_hash_rehash_everything(tab, tab->arraysize/2);
}
return BRT_OK; return BRT_OK;
} }
} }
...@@ -129,8 +137,10 @@ int toku_hash_delete (HASHTABLE tab, const char *key, ITEMLEN keylen) { ...@@ -129,8 +137,10 @@ int toku_hash_delete (HASHTABLE tab, const char *key, ITEMLEN keylen) {
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen) { int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen) {
int i; int i;
for (i=0; i<h->arraysize; i++) { int usei = random()%h->arraysize;
HASHELT he=h->array[i]; for (i=0; i<h->arraysize; i++, usei++) {
if (usei>h->arraysize) usei=0;
HASHELT he=h->array[usei];
if (he) { if (he) {
*key = he->key; *key = he->key;
*keylen = he->keylen; *keylen = he->keylen;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment