Commit 5079c146 authored by Yoni Fogel's avatar Yoni Fogel

Addresses #329

Most of the implementation of the range tree hash table

git-svn-id: file:///svn/tokudb@2041 c7de825b-a66e-492c-adef-691d508d4ae1
parent 2ac3d9fd
...@@ -9,42 +9,137 @@ ...@@ -9,42 +9,137 @@
*/ */
//Defines BOOL data type. #include "hash_table.h"
#include <brttypes.h>
static uint32 __toku_rth_hash(toku_rt_hashtable* table, DB_TXN* key) {
typedef u_int32_t uint32; assert(table);
size_t tmp = (size_t)key;
/* TODO: reallocate the hash table if it grows too big. Perhaps, use toku_get_prime in newbrt/primes.c */ return tmp % table->array_size;
const uint32 __toku_rth_init_size = 521; }
typedef struct __toku_rt_forest toku_rt_forest; int toku_rth_create(toku_rt_hashtable** ptable,
struct __toku_rt_forest { void* (*user_malloc) (size_t),
toku_range_tree* selfread; void (*user_free) (void*),
toku_range_tree* selfwrite; void* (*user_realloc)(void*, size_t)) {
}; int r;
toku_rt_hashtable* tmp = (toku_rt_hashtable*)user_malloc(sizeof(*tmp));
typedef struct __toku_rth_elt toku_rth_elt; if (0) { died1: user_free(tmp); return r; }
struct __toku_rth_elt { if (!tmp) return errno;
DB_TXN* key;
toku_range_forest value; memset(tmp, 0, sizeof(*tmp));
toku_rth_elt* next; tmp->malloc = user_malloc;
}; tmp->free = user_free;
tmp->realloc = user_realloc;
typedef struct { tmp->array_size = __toku_rth_init_size;
uint32 index; tmp->table = (toku_rth_elt**)
toku_rth_elt* next; tmp->malloc(tmp->array_size * sizeof(*tmp->table));
} toku_rth_finger; if (!tmp->table) { r = errno; goto died1; }
*ptable = tmp;
typedef struct __toku_rt_hash_elt toku_rt_hash_elt; return 0;
struct toku_rt_hashtable { }
toku_rth_elt** table;
uint32 num_keys; toku_rt_forest* toku_rth_find(toku_rt_hashtable* table, DB_TXN* key) {
uint32 array_size; assert(table && key);
};
uint32 index = __toku_rth_hash(table, key);
int toku_rth_create(toku_rt_hashtable** ptable); toku_rt_hash_elt* element = table->table[index];
while (element && element->key != key) element = element->next;
int toku_rth_find(toku_rt_hashtable* table, DB_TXN* key, toku_rt_forest* value, BOOL* found); return element ? &element->value : NULL;
int toku_rth_scan(toku_rt_hashtable* table, toku_rt_forest* value, toku_rth_finger* finger); }
int toku_rth_delete(toku_rt_hashtable* table, DB_TXN* key);
int toku_rth_close(toku_rt_hashtable* table); void toku_rth_start_scan(toku_rt_hashtable* table) {
assert(table);
table->finger_index = 0;
table->finger_ptr = NULL;
}
toku_rt_forest* toku_rth_next(toku_rt_hashtable* table) {
assert(table && value && found);
if (table->finger_ptr) table->finger_ptr = table->finger_ptr->next;
while (!table->finger_ptr && table->finger_index < table->array_size) {
table->finger_ptr = table->table[++table->finger_index];
};
return table->finger_ptr;
}
int toku_rth_delete(toku_rt_hashtable* table, DB_TXN* key) {
assert(table && key);
/* No elements. */
if (!table->num_keys) return EDOM;
uint32 index = __toku_rth_hash(table, key);
toku_rt_hash_elt* element = table->table[index];
/* No elements of the right hash. */
if (!element) return EDOM;
/* Case where it is the first element. */
if (element->key == key) {
table->table[index] = element->next;
goto recycle;
}
toku_rt_hash_elt* prev;
/* Case where it is not the first element. */
do {
prev = element;
element = element->next;
} while (element && element->key != key);
/* Not found. */
if (!element) return EDOM;
prev->next = element->next;
goto recycle;
recycle:
element->next = table->free_list;
table->free_list = element;
table->num_keys--;
return 0;
}
void toku_rth_close(toku_rt_hashtable* table) {
toku_rt_hash_elt* element;
toku_rt_hash_elt* next = NULL;
toku_rth_start_scan(table);
next = toku_rth_next(table);
while (next) {
element = next;
next = toku_rth_next(table);
table->free(element);
}
next = table->free_list;
while (next) {
element = next;
next = next->next;
table->free(element);
}
table->free(table->table);
table->free(table);
}
/* Will allow you to insert it over and over. You need to keep track. */
int toku_rth_insert(toku_rt_hashtable* table, DB_TXN* key,
toku_rt_forsest* value) {
assert(table && key && value);
uint32 index = __toku_rth_hash(table, key);
toku_rt_hash_elt* next = table->table[index];
/* Recycle */
toku_rt_hash_elt* element;
if (table->free_list) {
element = table->free_list;
table->free_list = table->free_list->next;
}
else {
/* Allocate a new one. */
element = (toku_rt_hash_elt*)table->malloc(sizeof(*element));
if (!element) return errno;
}
element->next = table->table[index];
table->table[index]->next = element;
table->num_keys++;
return 0;
}
\ No newline at end of file
...@@ -27,7 +27,7 @@ typedef struct __toku_rth_elt toku_rth_elt; ...@@ -27,7 +27,7 @@ typedef struct __toku_rth_elt toku_rth_elt;
struct __toku_rth_elt { struct __toku_rth_elt {
DB_TXN* key; DB_TXN* key;
toku_range_forest value; toku_range_forest value;
toku_rth_elt* next; toku_rth_elt* next;
}; };
typedef struct { typedef struct {
...@@ -37,14 +37,34 @@ typedef struct { ...@@ -37,14 +37,34 @@ typedef struct {
typedef struct __toku_rt_hash_elt toku_rt_hash_elt; typedef struct __toku_rt_hash_elt toku_rt_hash_elt;
struct toku_rt_hashtable { struct toku_rt_hashtable {
toku_rth_elt** table; toku_rth_elt** table;
uint32 num_keys; uint32 num_keys;
uint32 array_size; uint32 array_size;
uint32 finger_index;
toku_rth_elt* finger_ptr;
toku_rth_elt* free_list;
/** The user malloc function */
void* (*malloc) (size_t);
/** The user free function */
void (*free) (void*);
/** The user realloc function */
void* (*realloc)(void*, size_t);
}; };
int toku_rth_create(toku_rt_hashtable** ptable); int toku_rth_create(toku_rt_hashtable** ptable,
void* (*user_malloc) (size_t),
void (*user_free) (void*),
void* (*user_realloc)(void*, size_t));
void toku_rth_find(toku_rt_hashtable* table, DB_TXN* key, toku_rt_forest* value,
BOOL* found);
void toku_rth_start_scan(toku_rt_hashtable* table);
toku_rt_forest* toku_rth_next(toku_rt_hashtable* table);
int toku_rth_find(toku_rt_hashtable* table, DB_TXN* key, toku_rt_forest* value, BOOL* found);
int toku_rth_scan(toku_rt_hashtable* table, toku_rt_forest* value, toku_rth_finger* finger);
int toku_rth_delete(toku_rt_hashtable* table, DB_TXN* key); int toku_rth_delete(toku_rt_hashtable* table, DB_TXN* key);
int toku_rth_close(toku_rt_hashtable* table);
void toku_rth_close(toku_rt_hashtable* table);
int toku_rth_insert(toku_rt_hashtable* table, DB_TXN* key,
toku_rt_forsest* value);
This diff is collapsed.
#ifndef HASHTABLE_H
#define HASHTABLE_H
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "brttypes.h"
/* Hash table with chaining. */
/* The keys and values are byte sequences. */
/* The keys and values are malloc'd by the hashtable. */
/* Duplicate keys are allowed by default and are stored in a FIFO list */
typedef struct hashtable *HASHTABLE;
int toku_hashtable_create (HASHTABLE*);
/* Return 0 if the key is found in the hashtable, -1 otherwise. */
/* Warning: The data returned points to the internals of the hashtable. It is set to "const" to try to prevent you from messing it up. */
int toku_hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, ITEMLEN *datalen, int *type);
/* match on key, index on duplicates */
int toku_hash_find_idx (HASHTABLE tab, bytevec key, ITEMLEN keylen, int idx, bytevec *data, ITEMLEN *datalen, int *type);
/* Insert the key/data pair into the hash table.
If the key is not in the hash table then insert it.
If the key already exists and duplicates are allowed then append it to the list of duplicates.
If the key already exists and duplicates are not allowed then return an error */
int toku_hash_insert (HASHTABLE tab, const void *key, ITEMLEN keylen, const void *data, ITEMLEN datalen, int type);
/* Delete the first entry with the given key
It is OK to delete something that isn't there. */
int toku_hash_delete (HASHTABLE tab, const void *key, ITEMLEN keylen);
/* Delete all entries with the given key */
int toku_hash_delete_all (HASHTABLE tab, const void *key, ITEMLEN keylen);
void toku_hashtable_free(HASHTABLE *tab);
int toku_hashtable_n_entries(HASHTABLE);
void toku_hashtable_clear(HASHTABLE);
int toku_hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen, int *type, long int *randomnumber);
//int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
typedef struct hashelt *HASHELT;
struct hashelt {
HASHELT next;
unsigned int hash;
int type;
ITEMLEN keylen;
ITEMLEN vallen;
char keyval[]; /* the first KEYLEN bytes are the key. The next bytes are the value. */
};
struct hashelt_list {
HASHELT head;
HASHELT tail;
};
typedef struct hashdup *HASHDUP;
struct hashdup {
HASHDUP next;
struct hashelt_list kdlist;
};
struct hashtable {
HASHDUP *array;
unsigned int n_keys;
unsigned int arraysize;
unsigned int primeidx;
};
/* You cannot add or delete elements from the hashtable while iterating. */
void toku_hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,int type, void*), void*);
// If you don't want to use something, do something like use "key __attribute__((__unused__))" for keyvar.
#define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,typevar,body) ({ \
unsigned int hi_counter; \
for (hi_counter=0; hi_counter<table->arraysize; hi_counter++) { \
HASHDUP hi_dup; \
for (hi_dup=table->array[hi_counter]; hi_dup; hi_dup=hi_dup->next) { \
HASHELT hi_he; \
for (hi_he=hi_dup->kdlist.head; hi_he; hi_he=hi_he->next) { \
const char *keyvar = &hi_he->keyval[0]; \
ITEMLEN keylenvar = hi_he->keylen; \
const char *datavar = &hi_he->keyval[hi_he->keylen]; \
ITEMLEN datalenvar = hi_he->vallen; \
int typevar = hi_he->type; \
body; \
}}}})
#endif
/* -*- mode: C; c-basic-offset: 4 -*- */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include "key.h"
#include "hashtable.h"
#include "memory.h"
#include "primes.h"
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <arpa/inet.h>
void verify_hash_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
int N, int *data, char *saw) {
char *kv = (char*)kv_v;
char *dv = (char*)dv_v;
int num, k;
assert(kv[0]=='k');
assert(dv[0]=='d');
assert(strcmp(kv+1, dv+1)==0);
assert(strlen(kv)+1==kl);
assert(strlen(dv)+1==dl);
num = atoi(kv+1);
for (k=0; k<N; k++) {
if (data[k]==num) {
assert(!saw[k]);
saw[k]=1;
return;
}
}
fprintf(stderr, "%s isn't there\n", kv); abort();
}
void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl, int type,
int N, int *data, char *saw) {
char *kv = (char*)kv_v;
char *dv = (char*)dv_v;
int num, k;
assert(kv[0]=='k');
assert(dv[0]=='d');
assert(strcmp(kv+1, dv+1)==0);
assert(strlen(kv)+1==kl);
assert(strlen(dv)+1==dl);
assert(type == 0);
num = atoi(kv+1);
for (k=0; k<N; k++) {
if (data[k]==num) {
assert(!saw[k]);
saw[k]=1;
return;
}
}
fprintf(stderr, "%s isn't there\n", kv); abort();
}
void verify_htable (HASHTABLE htable, int N, int *data, char *saw) {
int j;
for (j=0; j<N; j++) {
saw[j]=0;
}
HASHTABLE_ITERATE(htable, kv, kl, dv, dl, type,
verify_htable_instance (kv, kl, dv, dl, type,
N, data, saw));
for (j=0; j<N; j++) {
assert(saw[j]);
}
}
void test0 (void) {
int r, i, j;
HASHTABLE htable;
int n_ops=1000;
int *data=malloc(sizeof(*data)*n_ops);
char*saw =malloc(sizeof(*saw)*n_ops);
int data_n = 0;
assert(data!=0);
r = toku_hashtable_create(&htable); assert(r==0);
assert(toku_hashtable_n_entries(htable)==0);
#if 0
{
bytevec kv=(void*)0xdeadbeef;
bytevec dv=(void*)0xbeefdead;
ITEMLEN kl=42, dl=43;
r = mdict_find_last(htable,&kv,&kl,&dv,&dl);
assert(r!=0);
assert((unsigned long)kv==0xdeadbeef);
assert((unsigned long)dv==0xbeefdead);
assert(kl==42);
assert(dl==43);
}
#endif
for (i=0; i<n_ops; i++) {
if (random()%4==1) {
// Delete something random
} else if (random()%2 == 0) {
// Insert something
try_another_random:
{
int ra = random()%(1<<30);
char kv[100], dv[100];
for (j=0; j<data_n; j++) {
if (ra==data[j]) goto try_another_random;
}
snprintf(kv, 99, "k%d", ra);
snprintf(dv, 99, "d%d", ra);
toku_hash_insert(htable, kv, strlen(kv)+1, dv, strlen(dv)+1, 0);
data[data_n++]=ra;
}
} else {
// Look up something
}
verify_htable(htable, data_n, data, saw);
}
toku_hashtable_free(&htable);
free(data);
free(saw);
}
void test1(void) {
HASHTABLE table;
int i, r;
r = toku_hashtable_create(&table); assert(r==0);
for (i=0; i<100; i++) {
char keys[4][100], vals[4][100];
int j;
for (j=0; j<4; j++) {
snprintf(keys[j], 100, "k%ld", (long)(random()));
snprintf(vals[j], 100, "v%d", j);
toku_hash_insert(table, keys[j], strlen(keys[j])+1, vals[j], strlen(vals[j])+1, 0);
}
for (j=0; j<4; j++) {
bytevec key, val;
ITEMLEN keylen, vallen;
int type;
long int randnum=random();
r = toku_hashtable_random_pick(table, &key, &keylen, &val, &vallen, &type, &randnum);
assert(r==0);
r = toku_hash_delete(table, key, keylen);
assert(r==0);
}
}
toku_hashtable_free(&table);
}
void test_insert_nodup(int n) {
HASHTABLE t;
int r;
r = toku_hashtable_create(&t);
assert(r == 0);
toku_hashtable_set_dups(t, 0);
int keys[n], vals[n];
int i;
for (i=0; i<n; i++) {
keys[i] = htonl(i);
vals[i] = i;
r = toku_hash_insert(t, &keys[i], sizeof keys[i], &vals[i], sizeof vals[i], i);
assert(r == 0);
}
for (i=0; i<n; i++) {
bytevec data; ITEMLEN datalen; int type;
r = toku_hash_find(t, &keys[i], sizeof keys[i], &data, &datalen, &type);
assert(r == 0);
assert(datalen == sizeof vals[i]);
assert(type == i);
int vv;
memcpy(&vv, data, datalen);
assert(vv == vals[i]);
}
/* try to insert duplicates should fail */
for (i=0; i<n; i++) {
keys[i] = htonl(i);
vals[i] = i;
r = toku_hash_insert(t, &keys[i], sizeof keys[i], &vals[i], sizeof vals[i], i);
assert(r != 0);
}
toku_hashtable_free(&t);
assert(t == 0);
}
void test_insert_dup(int n, int do_delete_all) {
HASHTABLE t;
int r;
r = toku_hashtable_create(&t);
assert(r == 0);
toku_hashtable_set_dups(t, 1);
int keys[n], vals[n];
int dupkey = n + n/2;
int i;
for (i=0; i<n; i++) {
keys[i] = htonl(i);
vals[i] = i;
r = toku_hash_insert(t, &keys[i], sizeof keys[i], &vals[i], sizeof vals[i], i);
assert(r == 0);
}
for (i=0; i<n; i++) {
int key = htonl(dupkey);
int val = i;
r = toku_hash_insert(t, &key, sizeof key, &val, sizeof val, i);
assert(r == 0);
}
for (i=0; i<n; i++) {
bytevec data; ITEMLEN datalen; int type;
r = toku_hash_find(t, &keys[i], sizeof keys[i], &data, &datalen, &type);
assert(r == 0);
assert(datalen == sizeof vals[i]);
assert(type == i);
int vv;
memcpy(&vv, data, datalen);
assert(vv == vals[i]);
}
for (i=0; ; i++) {
int key = htonl(dupkey);
bytevec data; ITEMLEN datalen; int type;
r = toku_hash_find(t, &key, sizeof key, &data, &datalen, &type);
if (r != 0) break;
assert(datalen == sizeof vals[i]);
assert(type == i);
int vv;
memcpy(&vv, data, datalen);
assert(vv == vals[i]);
if (do_delete_all)
r = toku_hash_delete_all(t, &key, sizeof key);
else
r = toku_hash_delete(t, &key, sizeof key);
assert(r == 0);
}
if (do_delete_all)
assert(i == 1);
else
assert(i == n);
toku_hashtable_free(&t);
assert(t == 0);
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
toku_test_primes();
test0();
test1();
test_insert_nodup(1000);
test_insert_dup(1000, 0);
test_insert_dup(1000, 1);
toku_malloc_cleanup();
return 0;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment