Commit e804fe3d authored by Rich Prohaska's avatar Rich Prohaska

implement the rebalance and shrink after pma deletion

git-svn-id: file:///svn/tokudb@220 c7de825b-a66e-492c-adef-691d508d4ae1
parent 5fea2602
#include "pma.h" #include "pma.h"
struct pma_cursor { struct pma_cursor {
PMA pma; PMA pma;
...@@ -13,12 +13,14 @@ struct pma { ...@@ -13,12 +13,14 @@ struct pma {
int n_pairs_present; /* How many array elements are non-null. */ int n_pairs_present; /* How many array elements are non-null. */
struct kv_pair **pairs; struct kv_pair **pairs;
int uplgN; /* The smallest power of two >= lg(N) */ int uplgN; /* The smallest power of two >= lg(N) */
double densitystep; /* Each doubling decreases the density by densitystep. double udt_step; /* upper density threshold step */
/* Each doubling decreases the density by density step.
* For example if array_len=256 and uplgN=8 then there are 5 doublings. * For example if array_len=256 and uplgN=8 then there are 5 doublings.
* Regions of size 8 are full. Regions of size 16 are 90% full. * Regions of size 8 are full. Regions of size 16 are 90% full.
* Regions of size 32 are 80% full. Regions of size 64 are 70% full. * Regions of size 32 are 80% full. Regions of size 64 are 70% full.
* Regions of size 128 are 60% full. Regions of size 256 are 50% full. * Regions of size 128 are 60% full. Regions of size 256 are 50% full.
* The densitystep is 0.10. */ * The density step is 0.10. */
double ldt_step; /* lower density threshold step */
struct list cursors; struct list cursors;
int (*compare_fun)(DB*,const DBT*,const DBT*); int (*compare_fun)(DB*,const DBT*,const DBT*);
void *skey, *sval; /* used in dbts */ void *skey, *sval; /* used in dbts */
...@@ -31,8 +33,38 @@ int pmainternal_printpairs (struct kv_pair *pairs[], int N); ...@@ -31,8 +33,38 @@ int pmainternal_printpairs (struct kv_pair *pairs[], int N);
int pmainternal_make_space_at (PMA pma, int idx); int pmainternal_make_space_at (PMA pma, int idx);
int pmainternal_find (PMA pma, DBT *, DB*); // The DB is so the comparison fuction can be called. int pmainternal_find (PMA pma, DBT *, DB*); // The DB is so the comparison fuction can be called.
void print_pma (PMA pma); /* useful for debugging, so keep the name short. I.e., not pmainternal_print_pma() */ void print_pma (PMA pma); /* useful for debugging, so keep the name short. I.e., not pmainternal_print_pma() */
int pma_resize_array(PMA pma, int asksize);
struct kv_pair_tag *pmainternal_extract_pairs(PMA pma, int lo, int hi);
/*
* resize the pma array to asksize. zero all array entries starting from startx.
*/
int __pma_resize_array(PMA pma, int asksize, int startx);
void pma_update_region(PMA pma, struct list *cursorset, struct kv_pair_tag *, int n); /*
* extract pairs from the pma in the window delimited by lo and hi.
*/
struct kv_pair_tag *__pma_extract_pairs(PMA pma, int count, int lo, int hi);
/*
* update the cursors in a cursor set given a set of tagged pairs.
*/
void __pma_update_cursors(PMA pma, struct list *cursorset, struct kv_pair_tag *tpairs, int n);
/*
* update this pma's cursors given a set of tagged pairs.
*/
void __pma_update_my_cursors(PMA pma, struct kv_pair_tag *tpairs, int n);
/*
* a deletion occured at index "here" in the pma. rebalance the windows around "here". if
* necessary, shrink the pma.
*/
void __pma_delete_at(PMA pma, int here);
/* density thresholds */
#define PMA_LDT_HIGH 0.25
#define PMA_LDT_LOW 0.40
#define PMA_UDT_HIGH 1.00
#define PMA_UDT_LOW 0.50
/* minimum array size */
#define PMA_MIN_ARRAY_SIZE 4
...@@ -214,8 +214,8 @@ static void test_smooth_region (void) { ...@@ -214,8 +214,8 @@ static void test_smooth_region (void) {
static void test_calculate_parameters (void) { static void test_calculate_parameters (void) {
struct pma pma; struct pma pma;
pma.N=4; pmainternal_calculate_parameters(&pma); assert(pma.uplgN==2); assert(pma.densitystep==0.5); pma.N=4; pmainternal_calculate_parameters(&pma); assert(pma.uplgN==2); assert(pma.udt_step==0.5);
pma.N=8; pmainternal_calculate_parameters(&pma); assert(pma.uplgN==4); assert(pma.densitystep==0.5); pma.N=8; pmainternal_calculate_parameters(&pma); assert(pma.uplgN==4); assert(pma.udt_step==0.5);
} }
...@@ -855,9 +855,9 @@ void test_pma_split_cursor(void) { ...@@ -855,9 +855,9 @@ void test_pma_split_cursor(void) {
/* insert some kv pairs */ /* insert some kv pairs */
for (i=1; i<=16; i += 1) { for (i=1; i<=16; i += 1) {
DBT dbtk, dbtv; DBT dbtk, dbtv;
char k[5]; int v; char k[11]; int v;
sprintf(k, "%4.4d", i); snprintf(k, sizeof k, "%.10d", i);
fill_dbt(&dbtk, &k, strlen(k)+1); fill_dbt(&dbtk, &k, strlen(k)+1);
v = i; v = i;
fill_dbt(&dbtv, &v, sizeof v); fill_dbt(&dbtv, &v, sizeof v);
...@@ -946,6 +946,11 @@ void test_pma_split(void) { ...@@ -946,6 +946,11 @@ void test_pma_split(void) {
test_pma_split_cursor(); memory_check_all_free(); test_pma_split_cursor(); memory_check_all_free();
} }
/*
* test the pma_bulk_insert function by creating n kv pairs and bulk
* inserting them into an empty pma. verify that the pma contains all
* of the kv pairs.
*/
void test_pma_bulk_insert_n(int n) { void test_pma_bulk_insert_n(int n) {
PMA pma; PMA pma;
int error; int error;
...@@ -965,11 +970,11 @@ void test_pma_bulk_insert_n(int n) { ...@@ -965,11 +970,11 @@ void test_pma_bulk_insert_n(int n) {
/* init n kv pairs */ /* init n kv pairs */
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
char kstring[5]; char kstring[11];
char *k; int klen; char *k; int klen;
int *v; int vlen; int *v; int vlen;
sprintf(kstring, "%4.4d", i); snprintf(kstring, sizeof kstring, "%.10d", i);
klen = strlen(kstring) + 1; klen = strlen(kstring) + 1;
k = toku_malloc(klen); k = toku_malloc(klen);
assert(k); assert(k);
...@@ -988,8 +993,17 @@ void test_pma_bulk_insert_n(int n) { ...@@ -988,8 +993,17 @@ void test_pma_bulk_insert_n(int n) {
assert(error == 0); assert(error == 0);
/* verify */ /* verify */
print_pma(pma); if (0) print_pma(pma);
assert(n == pma_n_entries(pma)); assert(n == pma_n_entries(pma));
for (i=0; i<n; i++) {
DBT val;
init_dbt(&val); val.flags = DB_DBT_MALLOC;
error = pma_lookup(pma, &keys[i], &val, 0);
assert(error == 0);
assert(vals[i].size == val.size);
assert(memcmp(vals[i].data, val.data, val.size) == 0);
toku_free(val.data);
}
/* cleanup */ /* cleanup */
for (i=0; i<n; i++) { for (i=0; i<n; i++) {
...@@ -1050,6 +1064,191 @@ void test_pma_insert_or_replace(void) { ...@@ -1050,6 +1064,191 @@ void test_pma_insert_or_replace(void) {
assert(r==0); assert(r==0);
} }
/*
* test that the pma shrinks back to its minimum size.
*/
void test_pma_delete_shrink(int n) {
PMA pma;
int r;
int i;
printf("test_pma_delete_shrink:%d\n", n);
r = pma_create(&pma, default_compare_fun);
assert(r == 0);
/* insert */
for (i=0; i<n; i++) {
char k[11];
int v;
DBT key, val;
snprintf(k, sizeof k, "%.10d", i);
fill_dbt(&key, k, strlen(k)+1);
v = i;
fill_dbt(&val, &v, sizeof v);
r = pma_insert(pma, &key, &val, 0);
assert(r == 0);
}
/* delete */
for (i=0; i<n; i++) {
char k[11];
DBT key;
snprintf(k, sizeof k, "%.10d", i);
fill_dbt(&key, k, strlen(k)+1);
r = pma_delete(pma, &key, 0);
assert(r == 0);
}
assert(pma->N == PMA_MIN_ARRAY_SIZE);
r = pma_free(&pma);
assert(r == 0);
}
/*
* test that the pma shrinks to its minimum size after inserting
* random keys and then deleting them.
*/
void test_pma_delete_random(int n) {
PMA pma;
int r;
int i;
int keys[n];
printf("test_pma_delete_random:%d\n", n);
r = pma_create(&pma, default_compare_fun);
assert(r == 0);
for (i=0; i<n; i++) {
keys[i] = random();
}
/* insert */
for (i=0; i<n; i++) {
char k[11];
int v;
DBT key, val;
snprintf(k, sizeof k, "%.10d", keys[i]);
fill_dbt(&key, k, strlen(k)+1);
v = keys[i];
fill_dbt(&val, &v, sizeof v);
r = pma_insert(pma, &key, &val, 0);
assert(r == 0);
}
/* delete */
for (i=0; i<n; i++) {
char k[11];
DBT key;
snprintf(k, sizeof k, "%.10d", keys[i]);
fill_dbt(&key, k, strlen(k)+1);
r = pma_delete(pma, &key, 0);
assert(r == 0);
}
assert(pma->N == PMA_MIN_ARRAY_SIZE);
r = pma_free(&pma);
assert(r == 0);
}
void assert_cursor_equal(PMA_CURSOR pmacursor, int v) {
DBT key, val;
init_dbt(&key); key.flags = DB_DBT_MALLOC;
init_dbt(&val); val.flags = DB_DBT_MALLOC;
int r;
r = pma_cget_current(pmacursor, &key, &val);
assert(r == 0);
if (0) printf("key %s\n", (char*) key.data);
int thev;
assert(val.size == sizeof thev);
memcpy(&thev, val.data, val.size);
assert(thev == v);
toku_free(key.data);
toku_free(val.data);
}
void assert_cursor_nokey(PMA_CURSOR pmacursor) {
DBT key, val;
init_dbt(&key); key.flags = DB_DBT_MALLOC;
init_dbt(&val); val.flags = DB_DBT_MALLOC;
int r;
r = pma_cget_current(pmacursor, &key, &val);
assert(r != 0);
}
/*
* test that pma delete ops update pma cursors
* - insert n keys
* - point the cursor at the last key in the pma
* - delete keys sequentially. the cursor should be stuck at the
* last key until the last key is deleted.
*/
void test_pma_delete_cursor(int n) {
printf("test_delete_cursor:%d\n", n);
PMA pma;
int r;
r = pma_create(&pma, default_compare_fun);
assert(r == 0);
int i;
for (i=0; i<n; i++) {
char k[11];
int v;
DBT key, val;
snprintf(k, sizeof k, "%.10d", i);
fill_dbt(&key, k, strlen(k)+1);
v = i;
fill_dbt(&val, &v, sizeof v);
r = pma_insert(pma, &key, &val, 0);
assert(r == 0);
}
PMA_CURSOR pmacursor;
r = pma_cursor(pma, &pmacursor);
assert(r == 0);
r = pma_cursor_set_position_last(pmacursor);
assert(r == 0);
assert_cursor_equal(pmacursor, n-1);
for (i=0; i<n; i++) {
char k[11];
DBT key;
snprintf(k, sizeof k, "%.10d", i);
fill_dbt(&key, k, strlen(k)+1);
r = pma_delete(pma, &key, 0);
assert(r == 0);
if (i == n-1)
assert_cursor_nokey(pmacursor);
else
assert_cursor_equal(pmacursor, n-1);
}
assert(pma->N == PMA_MIN_ARRAY_SIZE);
r = pma_cursor_free(&pmacursor);
assert(r == 0);
r = pma_free(&pma);
assert(r == 0);
}
void test_pma_delete() {
test_pma_delete_shrink(256); memory_check_all_free();
test_pma_delete_random(256); memory_check_all_free();
test_pma_delete_cursor(32); memory_check_all_free();
}
void pma_tests (void) { void pma_tests (void) {
memory_check=1; memory_check=1;
test_keycompare(); memory_check_all_free(); test_keycompare(); memory_check_all_free();
...@@ -1068,6 +1267,7 @@ void pma_tests (void) { ...@@ -1068,6 +1267,7 @@ void pma_tests (void) {
test_pma_split(); memory_check_all_free(); test_pma_split(); memory_check_all_free();
test_pma_bulk_insert(); memory_check_all_free(); test_pma_bulk_insert(); memory_check_all_free();
test_pma_insert_or_replace(); memory_check_all_free(); test_pma_insert_or_replace(); memory_check_all_free();
test_pma_delete();
} }
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) { int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment