Commit 33b25277 authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Merge in the OMT and its integration into the main line.

{{{
$ cd tokudb
$ svn merge -r3533:3571 https://svn.tokutek.com/tokudb/tokudb.724
}}}
 
There remain a few problems with recovery, but the main line has problems with recovery, so let's bite the bullet and move forward.

Fixes #729, #724.


git-svn-id: file:///svn/tokudb@3572 c7de825b-a66e-492c-adef-691d508d4ae1
parent 0a37b673
...@@ -72,22 +72,18 @@ REGRESSION_TESTS = \ ...@@ -72,22 +72,18 @@ REGRESSION_TESTS = \
test-brt-delete-both \ test-brt-delete-both \
test-brt-overflow \ test-brt-overflow \
test-del-inorder \ test-del-inorder \
test-gpma-blackbox \
test-gpma-glassbox \
test-gpma-glassbox \
test-gpma-leftmost-dup \
test-inc-split \ test-inc-split \
test-primes \ test-primes \
test_oexcl \ test_oexcl \
test_toku_malloc_plain_free \ test_toku_malloc_plain_free \
ybt-test \ ybt-test \
log-test \ log-test \
omt-test \
# This line intentially kept commented so I can have a \ on the end of the previous line # This line intentially kept commented so I can have a \ on the end of the previous line
# Add in the binaries that must be run in various ways. # Add in the binaries that must be run in various ways.
BINS = $(REGRESSION_TESTS) \ BINS = $(REGRESSION_TESTS) \
benchmark-test \ benchmark-test \
test-gpma-worstinsert \
brtdump \ brtdump \
randbrt \ randbrt \
randdb4 \ randdb4 \
...@@ -99,7 +95,7 @@ OFILES = \ ...@@ -99,7 +95,7 @@ OFILES = \
brt-verify.o \ brt-verify.o \
brt.o \ brt.o \
cachetable.o \ cachetable.o \
fifo.o gpma.o \ fifo.o \
fingerprint.o \ fingerprint.o \
key.o \ key.o \
leafentry.o \ leafentry.o \
...@@ -107,6 +103,7 @@ OFILES = \ ...@@ -107,6 +103,7 @@ OFILES = \
log_code.o \ log_code.o \
memory.o \ memory.o \
mempool.o \ mempool.o \
omt.o \
primes.o \ primes.o \
recover.o \ recover.o \
roll.o \ roll.o \
...@@ -128,7 +125,7 @@ tdb-recover: LDFLAGS+=-lz ...@@ -128,7 +125,7 @@ tdb-recover: LDFLAGS+=-lz
recover.o: log_header.h log-internal.h log.h yerror.h brttypes.h kv-pair.h memory.h key.h cachetable.h recover.o: log_header.h log-internal.h log.h yerror.h brttypes.h kv-pair.h memory.h key.h cachetable.h
tdb-recover: $(OFILES) tdb-recover: $(OFILES)
roll.o: log_header.h log-internal.h log.h yerror.h brttypes.h kv-pair.h memory.h key.h cachetable.h gpma.h roll.o: log_header.h log-internal.h log.h yerror.h brttypes.h kv-pair.h memory.h key.h cachetable.h omt.h
log_code.o: log_header.h wbuf.h log-internal.h rbuf.h log_code.o: log_header.h wbuf.h log-internal.h rbuf.h
log_header.h: log_code.c log_header.h: log_code.c
...@@ -141,9 +138,6 @@ bins: $(BINS) ...@@ -141,9 +138,6 @@ bins: $(BINS)
# Put the benchmarktest_256 first since it takes the longest (and we want to use parallelism in the make) # Put the benchmarktest_256 first since it takes the longest (and we want to use parallelism in the make)
CHECKS = \ CHECKS = \
benchmarktest_256 \ benchmarktest_256 \
test-gpma-worstinsert-a \
test-gpma-worstinsert-b \
test-gpma-worstinsert-c \
$(REGRESSION_TESTS) \ $(REGRESSION_TESTS) \
# This line intentially kept commented so I can have a \ on the previous line # This line intentially kept commented so I can have a \ on the previous line
...@@ -151,12 +145,6 @@ CHECKS = \ ...@@ -151,12 +145,6 @@ CHECKS = \
check: bins $(patsubst %,check_%,$(CHECKS)) check: bins $(patsubst %,check_%,$(CHECKS))
check_benchmarktest_256: benchmark-test check_benchmarktest_256: benchmark-test
$(VGRIND) ./benchmark-test $(VERBVERBOSE) --valsize 256 --verify 1 $(VGRIND) ./benchmark-test $(VERBVERBOSE) --valsize 256 --verify 1
check_test-gpma-worstinsert-a: test-gpma-worstinsert
$(VGRIND) ./test-gpma-worstinsert $(VERBVERBOSE) -a
check_test-gpma-worstinsert-b: test-gpma-worstinsert
$(VGRIND) ./test-gpma-worstinsert $(VERBVERBOSE) -b
check_test-gpma-worstinsert-c: test-gpma-worstinsert
$(VGRIND) ./test-gpma-worstinsert $(VERBVERBOSE) -c
check_test-assert: test-assert check_test-assert: test-assert
@# no arguments, should err @# no arguments, should err
...@@ -179,34 +167,24 @@ check-fanout: ...@@ -179,34 +167,24 @@ check-fanout:
done done
log-test log-test2 log-test3 log-test4 log-test5 log-test6 benchmark-test brt-test brt-test0 brt-test1 brt-test2 brt-test3 brt-test4 brt-test5 test-brt-overflow brt-test-named-db brt-test-cursor brt-test-cursor-2 test-brt-delete-both brt-serialize-test brtdump test-inc-split test-del-inorder cachetable-test cachetable-test2: LDFLAGS+=-lz log-test log-test2 log-test3 log-test4 log-test5 log-test6 benchmark-test brt-test brt-test0 brt-test1 brt-test2 brt-test3 brt-test4 brt-test5 test-brt-overflow brt-test-named-db brt-test-cursor brt-test-cursor-2 test-brt-delete-both brt-serialize-test brtdump test-inc-split test-del-inorder cachetable-test cachetable-test2: LDFLAGS+=-lz
# pma: PROF_FLAGS=-fprofile-arcs -ftest-coverage
BRT_INTERNAL_H_INCLUDES = brt-internal.h cachetable.h fifo.h gpma.h brt.h brt-search.h brttypes.h yerror.h ybt.h log.h ../include/db.h kv-pair.h memory.h crc.h mempool.h leafentry.h HFILES = $(wildcard *.h)
BRT_INTERNAL_H_INCLUDES = brt-internal.h cachetable.h fifo.h omt.h brt.h brt-search.h brttypes.h yerror.h ybt.h log.h ../include/db.h kv-pair.h memory.h crc.h mempool.h leafentry.h
key.o: brttypes.h key.h key.o: brttypes.h key.h
list-test: list-test.o toku_assert.o list-test: list-test.o toku_assert.o
test-brt-delete-both: ybt.o brt.o fifo.o gpma.o memory.o leafentry.o brt-serialize.o cachetable.o ybt.o key.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o test-brt-delete-both: ybt.o brt.o fifo.o omt.o memory.o leafentry.o brt-serialize.o cachetable.o ybt.o key.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o
test-inc-split: $(TEST_OFILES) test-inc-split: $(TEST_OFILES)
brt-test-helpers.o: $(BRT_INTERNAL_H_INCLUDES) toku_assert.h brt-test-helpers.o: $(BRT_INTERNAL_H_INCLUDES) toku_assert.h
test-del-inorder: $(TEST_OFILES) test-del-inorder: $(TEST_OFILES)
# pma-test.o: $(BRT_INTERNAL_H_INCLUDES) pma-internal.h gpma.h list.h mempool.h omt.o: $(HFILES)
# pma-test: pma.o memory.o key.o ybt.o log.o mempool.o fingerprint.o brt-serialize.o fifo.o primes.o toku_assert.o log_code.o roll.o brt.o cachetable.o brt-verify.o
pma.o: gpma.h yerror.h pma-internal.h memory.h key.h ybt.h brttypes.h log.h ../include/db.h log_header.h
test-gpma-glassbox.o: test-gpma-glassbox.c gpma.h gpma-internal.h toku_assert.h memory.h
test-gpma-glassbox: test-gpma-glassbox.o toku_assert.o memory-debug.o gpma.o
test-gpma-blackbox: test-gpma-blackbox.o toku_assert.o memory.o gpma.o
test-gpma-worstinsert: test-gpma-worstinsert.o toku_assert.o memory.o gpma.o
test-gpma-leftmost-dup: test-gpma-leftmost-dup.o toku_assert.o memory.o gpma.o
test-gpma-worstinsert.o test-gpma-blackbox.o test-gpma-leftmost-dup.o: gpma.h memory.h toku_assert.h
: gpma.h memory.h toku_assert.h
gpma.o: gpma.c gpma.h
ybt.o: ybt.h brttypes.h ../include/db.h ybt.o: ybt.h brttypes.h ../include/db.h
ybt-test: ybt-test.o ybt.o memory.o toku_assert.o ybt-test: ybt-test.o ybt.o memory.o toku_assert.o
ybt-test.o: ybt.h ../include/db.h ybt-test.o: ybt.h ../include/db.h
cachetable.o: brttypes.h cachetable.h hashfun.h memory.h primes.h toku_assert.h $(BRT_INTERNAL_H_INCLUDES) log_header.h cachetable.o: brttypes.h cachetable.h hashfun.h memory.h primes.h toku_assert.h $(BRT_INTERNAL_H_INCLUDES) log_header.h
brt-test0 brt-test1 brt-test2 brt-test3 brt-test4 brt-test5 test-brt-overflow brt-test-named-db brt-test-cursor brt-test-cursor-2 brt-test: ybt.o brt.o fifo.o gpma.o leafentry.o memory.o brt-serialize.o cachetable.o ybt.o key.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o brt-test0 brt-test1 brt-test2 brt-test3 brt-test4 brt-test5 test-brt-overflow brt-test-named-db brt-test-cursor brt-test-cursor-2 brt-test: ybt.o brt.o fifo.o omt.o leafentry.o memory.o brt-serialize.o cachetable.o ybt.o key.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o
log.o: log_header.h log-internal.h log.h wbuf.h crc.h brttypes.h $(BRT_INTERNAL_H_INCLUDES) log.o: log_header.h log-internal.h log.h wbuf.h crc.h brttypes.h $(BRT_INTERNAL_H_INCLUDES)
logformat: logformat.o toku_assert.o logformat: logformat.o toku_assert.o
brt-test0.o brt-test1.o brt-test2.o brt-test3.o brt-test4.o brt-test5.o test-brt-overflow.h brt-test-named-db.o brt-test-cursor.o brt-test-cursor-2.o brt-test.o brt.o: brt.h brt-search.h ../include/db.h fifo.h gpma.h brttypes.h cachetable.h memory.h $(BRT_INTERNAL_H_INCLUDES) brt-test0.o brt-test1.o brt-test2.o brt-test3.o brt-test4.o brt-test5.o test-brt-overflow.h brt-test-named-db.o brt-test-cursor.o brt-test-cursor-2.o brt-test.o brt.o: brt.h brt-search.h ../include/db.h fifo.h omt.h brttypes.h cachetable.h memory.h $(BRT_INTERNAL_H_INCLUDES)
brt-serialize-test.o: $(BRT_INTERNAL_H_INCLUDES) brt-serialize-test.o: $(BRT_INTERNAL_H_INCLUDES)
brt.o: $(BRT_INTERNAL_H_INCLUDES) key.h log_header.h brt.o: $(BRT_INTERNAL_H_INCLUDES) key.h log_header.h
fifo.o: fifo.h brttypes.h fifo.o: fifo.h brttypes.h
...@@ -214,16 +192,18 @@ memory.o: memory.h ...@@ -214,16 +192,18 @@ memory.o: memory.h
primes.o: primes.h toku_assert.h primes.o: primes.h toku_assert.h
fifo-test: fifo.o memory.o toku_assert.o ybt.o fifo-test: fifo.o memory.o toku_assert.o ybt.o
brt-serialize.o: $(BRT_INTERNAL_H_INCLUDES) key.h wbuf.h rbuf.h brt-serialize.o: $(BRT_INTERNAL_H_INCLUDES) key.h wbuf.h rbuf.h
brt-bigtest: memory.o ybt.o brt.o gpma.o cachetable.o key.o fifo.o brt-serialize.o brt-bigtest: memory.o ybt.o brt.o omt.o cachetable.o key.o fifo.o brt-serialize.o
brt-bigtest.o: brt.h brt-search.h ../include/db.h brt-bigtest.o: brt.h brt-search.h ../include/db.h
log-test6 log-test5 log-test4 log-test3 log-test2 log-test: log.o memory.o leafentry.o toku_assert.o roll.o log_code.o brt-serialize.o brt.o cachetable.o gpma.o ybt.o fifo.o key.o fingerprint.o brt-verify.o mempool.o primes.o log-test6 log-test5 log-test4 log-test3 log-test2 log-test: log.o memory.o leafentry.o toku_assert.o roll.o log_code.o brt-serialize.o brt.o cachetable.o omt.o ybt.o fifo.o key.o fingerprint.o brt-verify.o mempool.o primes.o
brt-verify.o: $(BRT_INTERNAL_H_INCLUDES) brt-verify.o: $(BRT_INTERNAL_H_INCLUDES)
fingerprint.o: $(BRT_INTERNAL_H_INCLUDES) fingerprint.o: $(BRT_INTERNAL_H_INCLUDES)
mempool.o: toku_assert.h mempool.h mempool.o: toku_assert.h mempool.h
leafentry.o: brttypes.h crc.h leafentry.h memory.h toku_assert.h leafentry.o: brttypes.h crc.h leafentry.h memory.h toku_assert.h
toku_assert.o: toku_assert.h toku_assert.o: toku_assert.h
omt-test.o: toku_assert.h memory.h toku_assert.h ../include/db.h brttypes.h
omt-test: omt-test.o omt.o memory.o toku_assert.o
brt-serialize-test: brt-serialize-test.o brt-serialize.o leafentry.o memory.o fifo.o gpma.o key.o ybt.o brt.o cachetable.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o brt-serialize-test: brt-serialize-test.o brt-serialize.o leafentry.o memory.o fifo.o omt.o key.o ybt.o brt.o cachetable.o primes.o toku_assert.o log.o mempool.o brt-verify.o fingerprint.o log_code.o roll.o
test_toku_malloc_plain_free: memory.o toku_assert.o test_toku_malloc_plain_free: memory.o toku_assert.o
......
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
#include "cachetable.h" #include "cachetable.h"
#include "fifo.h" #include "fifo.h"
#include "yerror.h" #include "yerror.h"
#include "gpma.h"
#include "brt.h" #include "brt.h"
#include "crc.h" #include "crc.h"
#include "list.h" #include "list.h"
...@@ -15,15 +14,21 @@ ...@@ -15,15 +14,21 @@
#include "kv-pair.h" #include "kv-pair.h"
#include "leafentry.h" #include "leafentry.h"
typedef LEAFENTRY OMTVALUE;
#include "omt.h"
#ifndef BRT_FANOUT #ifndef BRT_FANOUT
#define BRT_FANOUT 16 #define BRT_FANOUT 16
#endif #endif
enum { TREE_FANOUT = BRT_FANOUT }; enum { TREE_FANOUT = BRT_FANOUT };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */ enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
enum { PMA_ITEM_OVERHEAD = 4 }; enum { OMT_ITEM_OVERHEAD = 0 }; /* No overhead for the OMT item. The PMA needed to know the idx, but the OMT doesn't. */
enum { BRT_CMD_OVERHEAD = (1 // the type enum { BRT_CMD_OVERHEAD = (1 // the type
+ 8) // the xid + 8) // the xid
}; };
enum { LE_OVERHEAD_BOUND = 9 }; // the type and xid
enum { BRT_DEFAULT_NODE_SIZE = 1 << 20 }; enum { BRT_DEFAULT_NODE_SIZE = 1 << 20 };
struct nodeheader_in_file { struct nodeheader_in_file {
...@@ -57,7 +62,7 @@ struct brtnode { ...@@ -57,7 +62,7 @@ struct brtnode {
// When we checkpoint: Create a checkpoint record, and cause every dirty node to be written to disk. The new checkpoint record is *not* incorporated into the disk_lsn of the written nodes. // When we checkpoint: Create a checkpoint record, and cause every dirty node to be written to disk. The new checkpoint record is *not* incorporated into the disk_lsn of the written nodes.
// While we are checkpointing, someone may modify a dirty node that has not yet been written. In that case, when we unpin the node, we make the new copy (because the disk_lsn<checkpoint_lsn), just as we would usually. // While we are checkpointing, someone may modify a dirty node that has not yet been written. In that case, when we unpin the node, we make the new copy (because the disk_lsn<checkpoint_lsn), just as we would usually.
// //
int layout_version; // What version of the data structure? (version 2 adds the xid to the brt cmds) int layout_version; // What version of the data structure? (version 2 adds the xid to the brt cmds)
int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */ int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */
u_int32_t rand4fingerprint; u_int32_t rand4fingerprint;
u_int32_t local_fingerprint; /* For leaves this is everything in the buffer. For nonleaves, this is everything in the buffers, but does not include child subtree fingerprints. */ u_int32_t local_fingerprint; /* For leaves this is everything in the buffer. For nonleaves, this is everything in the buffers, but does not include child subtree fingerprints. */
...@@ -82,7 +87,7 @@ struct brtnode { ...@@ -82,7 +87,7 @@ struct brtnode {
However, in the absense of duplicate keys, child 1's keys *are* > childkeys[0]. */ However, in the absense of duplicate keys, child 1's keys *are* > childkeys[0]. */
} n; } n;
struct leaf { struct leaf {
GPMA buffer; OMT buffer;
unsigned int n_bytes_in_buffer; /* How many bytes to represent the PMA (including the per-key overheads, but not including the overheads for the node. */ unsigned int n_bytes_in_buffer; /* How many bytes to represent the PMA (including the per-key overheads, but not including the overheads for the node. */
struct mempool buffer_mempool; struct mempool buffer_mempool;
} l; } l;
...@@ -186,6 +191,7 @@ struct brt_cursor { ...@@ -186,6 +191,7 @@ struct brt_cursor {
void *skey, *sval; void *skey, *sval;
}; };
// logs the memory allocation, but not the creation of the new node
int toku_create_new_brtnode (BRT t, BRTNODE *result, int height, TOKULOGGER logger); int toku_create_new_brtnode (BRT t, BRTNODE *result, int height, TOKULOGGER logger);
int toku_unpin_brtnode (BRT brt, BRTNODE node) ; int toku_unpin_brtnode (BRT brt, BRTNODE node) ;
unsigned int toku_brtnode_which_child (BRTNODE node , DBT *k, DBT *d, BRT t); unsigned int toku_brtnode_which_child (BRTNODE node , DBT *k, DBT *d, BRT t);
...@@ -206,12 +212,12 @@ struct cmd_leafval_bessel_extra { ...@@ -206,12 +212,12 @@ struct cmd_leafval_bessel_extra {
BRT_CMD cmd; BRT_CMD cmd;
int compare_both_keys; // Set to 1 for DUPSORT databases that are not doing a DELETE_BOTH int compare_both_keys; // Set to 1 for DUPSORT databases that are not doing a DELETE_BOTH
}; };
int toku_cmd_leafval_bessel (u_int32_t dlen, void *leafentry, void *extra); int toku_cmd_leafval_bessel (LEAFENTRY leafentry, void *extra);
int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger); int toku_brt_root_put_cmd(BRT brt, BRT_CMD cmd, TOKULOGGER logger);
int toku_cachefile_root_put_cmd (CACHEFILE cf, BRT_CMD cmd, TOKULOGGER logger); int toku_cachefile_root_put_cmd (CACHEFILE cf, BRT_CMD cmd, TOKULOGGER logger);
int toku_gpma_compress_kvspace (GPMA pma, struct mempool *memp); int toku_omt_compress_kvspace (OMT omt, struct mempool *memp);
void *mempool_malloc_from_gpma(GPMA pma, struct mempool *mp, size_t size); void *mempool_malloc_from_omt(OMT omt, struct mempool *mp, size_t size);
#endif #endif
...@@ -27,7 +27,7 @@ static void test_serialize(void) { ...@@ -27,7 +27,7 @@ static void test_serialize(void) {
sn.thisnodename = sn.nodesize*20; sn.thisnodename = sn.nodesize*20;
sn.disk_lsn.lsn = 789; sn.disk_lsn.lsn = 789;
sn.log_lsn.lsn = 123456; sn.log_lsn.lsn = 123456;
sn.layout_version = 4; sn.layout_version = 5;
sn.height = 1; sn.height = 1;
sn.rand4fingerprint = randval; sn.rand4fingerprint = randval;
sn.local_fingerprint = 0; sn.local_fingerprint = 0;
...@@ -57,7 +57,7 @@ static void test_serialize(void) { ...@@ -57,7 +57,7 @@ static void test_serialize(void) {
assert(dn->thisnodename==nodesize*20); assert(dn->thisnodename==nodesize*20);
assert(dn->disk_lsn.lsn==123456); assert(dn->disk_lsn.lsn==123456);
assert(dn->layout_version ==4); assert(dn->layout_version ==5);
assert(dn->height == 1); assert(dn->height == 1);
assert(dn->rand4fingerprint==randval); assert(dn->rand4fingerprint==randval);
assert(dn->u.n.n_children==2); assert(dn->u.n.n_children==2);
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
#define _XOPEN_SOURCE 500 #define _XOPEN_SOURCE 500
//#include "pma.h"
#include "toku_assert.h" #include "toku_assert.h"
#include "brt-internal.h" #include "brt-internal.h"
#include "key.h" #include "key.h"
...@@ -56,14 +55,15 @@ static unsigned int toku_serialize_brtnode_size_slow (BRTNODE node) { ...@@ -56,14 +55,15 @@ static unsigned int toku_serialize_brtnode_size_slow (BRTNODE node) {
return size+hsize+csize; return size+hsize+csize;
} else { } else {
unsigned int hsize=0; unsigned int hsize=0;
GPMA_ITERATE(node->u.l.buffer, int addupsize (LEAFENTRY le, u_int32_t UU(idx), void *vp) {
idx, vlen, vdata, unsigned int *ip=vp;
({ (*ip) += OMT_ITEM_OVERHEAD + leafentry_disksize(le);
LEAFENTRY le=vdata; return 0;
hsize+= PMA_ITEM_OVERHEAD + leafentry_disksize(le); }
})); toku_omt_iterate(node->u.l.buffer,
addupsize,
&hsize);
assert(hsize<=node->u.l.n_bytes_in_buffer); assert(hsize<=node->u.l.n_bytes_in_buffer);
hsize+=4; /* the PMA size */
hsize+=4; /* add n entries in buffer table. */ hsize+=4; /* add n entries in buffer table. */
return size+hsize; return size+hsize;
} }
...@@ -81,8 +81,7 @@ unsigned int toku_serialize_brtnode_size (BRTNODE node) { ...@@ -81,8 +81,7 @@ unsigned int toku_serialize_brtnode_size (BRTNODE node) {
result+=(8+4+4)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */ result+=(8+4+4)*(node->u.n.n_children); /* For each child, a child offset, a count for the number of hash table entries, and the subtree fingerprint. */
result+=node->u.n.n_bytes_in_buffers; result+=node->u.n.n_bytes_in_buffers;
} else { } else {
result+=(4 /* n_entries in buffer table. */ result+=4; /* n_entries in buffer table. */
+4); /* the pma size */
result+=node->u.l.n_bytes_in_buffer; result+=node->u.l.n_bytes_in_buffer;
if (toku_memory_check) { if (toku_memory_check) {
unsigned int slowresult = toku_serialize_brtnode_size_slow(node); unsigned int slowresult = toku_serialize_brtnode_size_slow(node);
...@@ -177,14 +176,13 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) { ...@@ -177,14 +176,13 @@ void toku_serialize_brtnode_to (int fd, DISKOFF off, BRTNODE node) {
} }
} else { } else {
//printf("%s:%d writing node %lld n_entries=%d\n", __FILE__, __LINE__, node->thisnodename, toku_gpma_n_entries(node->u.l.buffer)); //printf("%s:%d writing node %lld n_entries=%d\n", __FILE__, __LINE__, node->thisnodename, toku_gpma_n_entries(node->u.l.buffer));
wbuf_uint(&w, toku_gpma_n_entries(node->u.l.buffer)); wbuf_uint(&w, toku_omt_size(node->u.l.buffer));
wbuf_uint(&w, toku_gpma_index_limit(node->u.l.buffer)); int wbufwriteleafentry (LEAFENTRY le, u_int32_t UU(idx), void *v) {
GPMA_ITERATE(node->u.l.buffer, idx, vlen, vdata, struct wbuf *thisw=v;
({ wbuf_LEAFENTRY(thisw, le);
//printf(" %s:%d idx=%d\n", __FILE__, __LINE__, idx); return 0;
wbuf_uint(&w, idx); }
wbuf_LEAFENTRY(&w, vdata); toku_omt_iterate(node->u.l.buffer, wbufwriteleafentry, &w);
}));
} }
assert(w.ndone<=w.size); assert(w.ndone<=w.size);
#ifdef CRC_ATEND #ifdef CRC_ATEND
...@@ -266,7 +264,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode) { ...@@ -266,7 +264,7 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode) {
} }
} }
result->layout_version = rbuf_int(&rc); result->layout_version = rbuf_int(&rc);
if (result->layout_version!=4) { if (result->layout_version!=5) {
r=DB_BADFORMAT; r=DB_BADFORMAT;
goto died1; goto died1;
} }
...@@ -368,11 +366,10 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode) { ...@@ -368,11 +366,10 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode) {
} }
} else { } else {
int n_in_buf = rbuf_int(&rc); int n_in_buf = rbuf_int(&rc);
int index_limit = rbuf_int(&rc);
result->u.l.n_bytes_in_buffer = 0; result->u.l.n_bytes_in_buffer = 0;
r=toku_gpma_create(&result->u.l.buffer, index_limit); r=toku_omt_create(&result->u.l.buffer);
if (r!=0) { if (r!=0) {
if (0) { died_21: toku_gpma_free(&result->u.l.buffer, 0, 0); } if (0) { died_21: toku_omt_destroy(&result->u.l.buffer); }
goto died1; goto died1;
} }
//printf("%s:%d r PMA= %p\n", __FILE__, __LINE__, result->u.l.buffer); //printf("%s:%d r PMA= %p\n", __FILE__, __LINE__, result->u.l.buffer);
...@@ -388,18 +385,15 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode) { ...@@ -388,18 +385,15 @@ int toku_deserialize_brtnode_from (int fd, DISKOFF off, BRTNODE *brtnode) {
for (i=0; i<n_in_buf; i++) { for (i=0; i<n_in_buf; i++) {
LEAFENTRY tmp_le; LEAFENTRY tmp_le;
//printf("%s:%d reading %dth item\n", __FILE__, __LINE__, i); //printf("%s:%d reading %dth item\n", __FILE__, __LINE__, i);
int idx = rbuf_int(&rc);
//printf("%s:%d idx=%d\n", __FILE__, __LINE__, idx);
u_int32_t memsize, disksize; u_int32_t memsize, disksize;
rbuf_LEAFENTRY(&rc, &memsize, &disksize, &tmp_le); rbuf_LEAFENTRY(&rc, &memsize, &disksize, &tmp_le);
LEAFENTRY le = mempool_malloc_from_gpma(result->u.l.buffer, &result->u.l.buffer_mempool, memsize); LEAFENTRY le = mempool_malloc_from_omt(result->u.l.buffer, &result->u.l.buffer_mempool, memsize);
assert(le); assert(le);
memcpy(le, tmp_le, memsize); memcpy(le, tmp_le, memsize);
toku_free(tmp_le); toku_free(tmp_le);
assert(disksize==leafentry_disksize(le)); assert(disksize==leafentry_disksize(le));
result->u.l.n_bytes_in_buffer += disksize + PMA_ITEM_OVERHEAD; result->u.l.n_bytes_in_buffer += disksize + OMT_ITEM_OVERHEAD;
//printf("idx=%d\n", idx); toku_omt_insert_at(result->u.l.buffer, le, i);
toku_gpma_set_at_index(result->u.l.buffer, idx, memsize, le);
actual_sum += result->rand4fingerprint*toku_le_crc(le); actual_sum += result->rand4fingerprint*toku_le_crc(le);
//printf("%s:%d rand4=%08x fp=%08x \n", __FILE__, __LINE__, result->rand4fingerprint, actual_sum); //printf("%s:%d rand4=%08x fp=%08x \n", __FILE__, __LINE__, result->rand4fingerprint, actual_sum);
} }
...@@ -440,18 +434,26 @@ void toku_verify_counts (BRTNODE node) { ...@@ -440,18 +434,26 @@ void toku_verify_counts (BRTNODE node) {
/*foo*/ /*foo*/
if (node->height==0) { if (node->height==0) {
assert(node->u.l.buffer); assert(node->u.l.buffer);
unsigned int sum=0; struct sum_info {
unsigned int count=0; unsigned int dsum;
u_int32_t fp=0; unsigned int msum;
GPMA_ITERATE(node->u.l.buffer, idx, dlen, ddata, unsigned int count;
({ u_int32_t fp;
count++; } sum_info = {0,0,0,0};
sum+= PMA_ITEM_OVERHEAD + leafentry_disksize(ddata); // use the disk size, not the memory size. int sum_item (LEAFENTRY le, u_int32_t UU(idx), void *vsi) {
fp += toku_le_crc(ddata); struct sum_info *si = vsi;
})); si->count++;
assert(count==toku_gpma_n_entries(node->u.l.buffer)); si->dsum += OMT_ITEM_OVERHEAD + leafentry_disksize(le);
assert(sum==node->u.l.n_bytes_in_buffer); si->msum += leafentry_memsize(le);
u_int32_t fps = node->rand4fingerprint *fp; si->fp += toku_le_crc(le);
return 0;
}
toku_omt_iterate(node->u.l.buffer, sum_item, &sum_info);
assert(sum_info.count==toku_omt_size(node->u.l.buffer));
assert(sum_info.dsum==node->u.l.n_bytes_in_buffer);
assert(sum_info.msum == node->u.l.buffer_mempool.free_offset - node->u.l.buffer_mempool.frag_size);
u_int32_t fps = node->rand4fingerprint * sum_info.fp;
assert(fps==node->local_fingerprint); assert(fps==node->local_fingerprint);
} else { } else {
unsigned int sum = 0; unsigned int sum = 0;
......
...@@ -80,33 +80,32 @@ int toku_testsetup_insert_to_leaf (BRT brt, DISKOFF diskoff, char *key, int keyl ...@@ -80,33 +80,32 @@ int toku_testsetup_insert_to_leaf (BRT brt, DISKOFF diskoff, char *key, int keyl
LEAFENTRY tmp_leafentry; LEAFENTRY tmp_leafentry;
r = le_committed(keylen, key, vallen, val, &lesize, &disksize, &tmp_leafentry); r = le_committed(keylen, key, vallen, val, &lesize, &disksize, &tmp_leafentry);
LEAFENTRY leafentry = mempool_malloc_from_gpma(node->u.l.buffer, &node->u.l.buffer_mempool, lesize); LEAFENTRY leafentry = mempool_malloc_from_omt(node->u.l.buffer, &node->u.l.buffer_mempool, lesize);
memcpy(leafentry, tmp_leafentry, lesize); memcpy(leafentry, tmp_leafentry, lesize);
toku_free(tmp_leafentry); toku_free(tmp_leafentry);
u_int32_t storedlen; LEAFENTRY storeddata;
void *storeddata;
u_int32_t idx; u_int32_t idx;
DBT keydbt,valdbt; DBT keydbt,valdbt;
BRT_CMD_S cmd = {BRT_INSERT, 0, .u.id={toku_fill_dbt(&keydbt, key, keylen), BRT_CMD_S cmd = {BRT_INSERT, 0, .u.id={toku_fill_dbt(&keydbt, key, keylen),
toku_fill_dbt(&valdbt, val, vallen)}}; toku_fill_dbt(&valdbt, val, vallen)}};
struct cmd_leafval_bessel_extra be = {brt, &cmd, node->flags & TOKU_DB_DUPSORT}; struct cmd_leafval_bessel_extra be = {brt, &cmd, node->flags & TOKU_DB_DUPSORT};
r = toku_gpma_lookup_bessel(node->u.l.buffer, toku_cmd_leafval_bessel, 0, &be, &storedlen, &storeddata, &idx); r = toku_omt_find_zero(node->u.l.buffer, toku_cmd_leafval_bessel, &be, &storeddata, &idx);
if (r==0) { if (r==0) {
// It's already there. So now we have to remove it and put the new one back in. // It's already there. So now we have to remove it and put the new one back in.
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + leafentry_disksize(storeddata); node->u.l.n_bytes_in_buffer -= OMT_ITEM_OVERHEAD + leafentry_disksize(storeddata);
node->local_fingerprint -= node->rand4fingerprint*toku_le_crc(storeddata); node->local_fingerprint -= node->rand4fingerprint*toku_le_crc(storeddata);
toku_mempool_mfree(&node->u.l.buffer_mempool, storeddata, storedlen); toku_mempool_mfree(&node->u.l.buffer_mempool, storeddata, leafentry_memsize(storeddata));
// Now put the new kv in. // Now put the new kv in.
toku_gpma_set_at_index(node->u.l.buffer, idx, lesize, leafentry); toku_omt_set_at(node->u.l.buffer, leafentry, idx);
} else { } else {
r = toku_gpma_insert_bessel(node->u.l.buffer, lesize, leafentry, toku_cmd_leafval_bessel, &be, 0, 0, 0); r = toku_omt_insert(node->u.l.buffer, leafentry, toku_cmd_leafval_bessel, &be, 0);
assert(r==0); assert(r==0);
} }
node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + disksize; node->u.l.n_bytes_in_buffer += OMT_ITEM_OVERHEAD + disksize;
node->local_fingerprint += node->rand4fingerprint*toku_le_crc(leafentry); node->local_fingerprint += node->rand4fingerprint*toku_le_crc(leafentry);
node->dirty=1; node->dirty=1;
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
#include "brt.h" #include "brt.h"
#include "key.h" #include "key.h"
#include "gpma.h"
#include "brt-internal.h" #include "brt-internal.h"
#include "memory.h" #include "memory.h"
#include "toku_assert.h" #include "toku_assert.h"
......
...@@ -30,6 +30,7 @@ static void test2 (int memcheck, int limit) { ...@@ -30,6 +30,7 @@ static void test2 (int memcheck, int limit) {
snprintf(key,100,"hello%d",i); snprintf(key,100,"hello%d",i);
snprintf(val,100,"there%d",i); snprintf(val,100,"there%d",i);
toku_brt_insert(t, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), null_txn); toku_brt_insert(t, toku_fill_dbt(&k, key, 1+strlen(key)), toku_fill_dbt(&v, val, 1+strlen(val)), null_txn);
r = toku_verify_brt(t); assert(r==0);
//printf("%s:%d did insert %d\n", __FILE__, __LINE__, i); //printf("%s:%d did insert %d\n", __FILE__, __LINE__, i);
if (0) { if (0) {
brt_flush(t); brt_flush(t);
......
...@@ -127,14 +127,15 @@ int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, b ...@@ -127,14 +127,15 @@ int toku_verify_brtnode (BRT brt, DISKOFF off, bytevec lorange, ITEMLEN lolen, b
} }
} else { } else {
// Make sure that they are in increasing order. // Make sure that they are in increasing order.
void *prev=0; int check_increasing (LEAFENTRY v, u_int32_t idx, void *vprevp) {
GPMA_ITERATE(node->u.l.buffer, idx, dlen, data, LEAFENTRY *prevp = vprevp;
({ if (idx>0)
if (prev==0) assert(compare_leafentries(brt, *prevp, v)<0);
prev=data; *prevp=v;
else return 0;
assert(compare_leafentries(brt, prev, data)<0); }
})); LEAFENTRY prev=0;
toku_omt_iterate(node->u.l.buffer, check_increasing, &prev);
} }
if ((r = toku_cachetable_unpin(brt->cf, off, 0, 0))) return r; if ((r = toku_cachetable_unpin(brt->cf, off, 0, 0))) return r;
return result; return result;
......
...@@ -43,7 +43,8 @@ ...@@ -43,7 +43,8 @@
//#define SLOW //#define SLOW
#ifdef SLOW #ifdef SLOW
#define VERIFY_NODE(n) toku_verify_counts(n) #define VERIFY_NODE(n) (toku_verify_counts(n), verify_all_in_mempool(n))
#else #else
#define VERIFY_NODE(n) ((void)0) #define VERIFY_NODE(n) ((void)0)
#endif #endif
...@@ -71,7 +72,7 @@ void toku_brtnode_free (BRTNODE *nodep) { ...@@ -71,7 +72,7 @@ void toku_brtnode_free (BRTNODE *nodep) {
toku_free(node->u.n.childinfos); toku_free(node->u.n.childinfos);
} else { } else {
if (node->u.l.buffer) // The buffer may have been freed already, in some cases. if (node->u.l.buffer) // The buffer may have been freed already, in some cases.
toku_gpma_free(&node->u.l.buffer, 0, 0); toku_omt_destroy(&node->u.l.buffer);
void *mpbase = toku_mempool_get_base(&node->u.l.buffer_mempool); void *mpbase = toku_mempool_get_base(&node->u.l.buffer_mempool);
toku_mempool_fini(&node->u.l.buffer_mempool); toku_mempool_fini(&node->u.l.buffer_mempool);
...@@ -87,12 +88,19 @@ static long brtnode_size(BRTNODE node) { ...@@ -87,12 +88,19 @@ static long brtnode_size(BRTNODE node) {
return toku_serialize_brtnode_size(node); return toku_serialize_brtnode_size(node);
} }
static void toku_update_brtnode_loggerlsn(BRTNODE node, TOKULOGGER logger) {
if (logger) { static int verify_in_mempool(LEAFENTRY le, u_int32_t UU(idx), void *vmp) {
node->log_lsn = toku_logger_last_lsn(logger); struct mempool *mp=vmp;
assert(toku_mempool_inrange(mp, le, leafentry_memsize(le)));
return 0;
}
static void verify_all_in_mempool(BRTNODE node) {
if (node->height==0) {
toku_omt_iterate(node->u.l.buffer, verify_in_mempool, &node->u.l.buffer_mempool);
} }
} }
static void fixup_child_fingerprint(BRTNODE node, int childnum_of_node, BRTNODE child, BRT brt, TOKULOGGER logger) { static void fixup_child_fingerprint(BRTNODE node, int childnum_of_node, BRTNODE child, BRT brt, TOKULOGGER logger) {
u_int32_t old_fingerprint = BNC_SUBTREE_FINGERPRINT(node,childnum_of_node); u_int32_t old_fingerprint = BNC_SUBTREE_FINGERPRINT(node,childnum_of_node);
u_int32_t sum = child->local_fingerprint; u_int32_t sum = child->local_fingerprint;
...@@ -124,7 +132,6 @@ static int brt_compare_pivot(BRT brt, DBT *key, DBT *data, bytevec ck) { ...@@ -124,7 +132,6 @@ static int brt_compare_pivot(BRT brt, DBT *key, DBT *data, bytevec ck) {
return cmp; return cmp;
} }
void toku_brtnode_flush_callback (CACHEFILE cachefile, DISKOFF nodename, void *brtnode_v, long size __attribute((unused)), BOOL write_me, BOOL keep_me, LSN modified_lsn __attribute__((__unused__)) , BOOL rename_p __attribute__((__unused__))) { void toku_brtnode_flush_callback (CACHEFILE cachefile, DISKOFF nodename, void *brtnode_v, long size __attribute((unused)), BOOL write_me, BOOL keep_me, LSN modified_lsn __attribute__((__unused__)) , BOOL rename_p __attribute__((__unused__))) {
BRTNODE brtnode = brtnode_v; BRTNODE brtnode = brtnode_v;
// if ((write_me || keep_me) && (brtnode->height==0)) { // if ((write_me || keep_me) && (brtnode->height==0)) {
...@@ -285,7 +292,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height) ...@@ -285,7 +292,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height)
n->thisnodename = nodename; n->thisnodename = nodename;
n->disk_lsn.lsn = 0; // a new one can always be 0. n->disk_lsn.lsn = 0; // a new one can always be 0.
n->log_lsn = n->disk_lsn; n->log_lsn = n->disk_lsn;
n->layout_version = 4; n->layout_version = 5;
n->height = height; n->height = height;
n->rand4fingerprint = random(); n->rand4fingerprint = random();
n->local_fingerprint = 0; n->local_fingerprint = 0;
...@@ -298,7 +305,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height) ...@@ -298,7 +305,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height)
n->u.n.childinfos=0; n->u.n.childinfos=0;
n->u.n.childkeys=0; n->u.n.childkeys=0;
} else { } else {
int r = toku_gpma_create(&n->u.l.buffer, 0); int r = toku_omt_create(&n->u.l.buffer);
assert(r==0); assert(r==0);
{ {
u_int32_t mpsize = mp_pool_size_for_nodesize(n->nodesize); u_int32_t mpsize = mp_pool_size_for_nodesize(n->nodesize);
...@@ -314,6 +321,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height) ...@@ -314,6 +321,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height)
} }
} }
// logs the memory allocation, but not the creation of the new node
int toku_create_new_brtnode (BRT t, BRTNODE *result, int height, TOKULOGGER logger) { int toku_create_new_brtnode (BRT t, BRTNODE *result, int height, TOKULOGGER logger) {
TAGMALLOC(BRTNODE, n); TAGMALLOC(BRTNODE, n);
int r; int r;
...@@ -331,9 +339,6 @@ int toku_create_new_brtnode (BRT t, BRTNODE *result, int height, TOKULOGGER logg ...@@ -331,9 +339,6 @@ int toku_create_new_brtnode (BRT t, BRTNODE *result, int height, TOKULOGGER logg
r=toku_cachetable_put(t->cf, n->thisnodename, n, brtnode_size(n), r=toku_cachetable_put(t->cf, n->thisnodename, n, brtnode_size(n),
toku_brtnode_flush_callback, toku_brtnode_fetch_callback, t); toku_brtnode_flush_callback, toku_brtnode_fetch_callback, t);
assert(r==0); assert(r==0);
r=toku_log_newbrtnode(logger, (LSN*)0, 0, toku_cachefile_filenum(t->cf), n->thisnodename, height, n->nodesize, (t->flags&TOKU_DB_DUPSORT)!=0, n->rand4fingerprint);
assert(r==0);
toku_update_brtnode_loggerlsn(n, logger);
return 0; return 0;
} }
...@@ -350,90 +355,11 @@ static int insert_to_buffer_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT ...@@ -350,90 +355,11 @@ static int insert_to_buffer_in_nonleaf (BRTNODE node, int childnum, DBT *k, DBT
return 0; return 0;
} }
struct move_struct { static int fill_buf (LEAFENTRY le, u_int32_t idx, void *varray) {
TOKULOGGER logger; LEAFENTRY *array=varray;
FILENUM filenum; array[idx]=le;
BRTNODE from,to;
struct gitem last_pair_remaining_in_from;
};
int move_between_mempools (u_int32_t len, void *odata, void **ndata, void *extra) {
struct move_struct *ms=extra;
assert(ms->from->height==0);
assert(ms->to->height==0);
assert(len==(unsigned)leafentry_memsize(odata));
void *newitem=mempool_malloc_from_gpma(ms->to->u.l.buffer, &ms->to->u.l.buffer_mempool, len);
assert(newitem);
memcpy(newitem, odata, len);
toku_mempool_mfree(&ms->from->u.l.buffer_mempool, odata, len);
*ndata = newitem;
assert(len==(unsigned)leafentry_memsize(newitem));
return 0;
}
int note_move_items_within_or_between (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, BRTNODE from, BRTNODE to, FILENUM filenum, TOKULOGGER logger, u_int32_t old_N, u_int32_t new_N) {
INTPAIRARRAY ipa;
MALLOC_N(nitems, ipa.array);
if (ipa.array==0) return errno;
u_int32_t i;
for (i=0; i<nitems; i++) {
ipa.array[i].a=froms[i];
ipa.array[i].b=tos [i];
}
ipa.size=nitems;
if (logger) {
LSN lsn;
int r = toku_log_pmadistribute(logger, &lsn, 0, filenum, from->thisnodename, to->thisnodename, ipa, old_N, new_N);
if (r!=0) return r;
from->log_lsn=lsn;
to->log_lsn =lsn;
}
toku_free(ipa.array);
return 0;
}
static int note_move_items_within (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items __attribute__((__unused__)), u_int32_t old_N, u_int32_t new_N, void *extra) {
struct move_struct *ms=extra;
assert(nitems>0);
ms->last_pair_remaining_in_from=items[nitems-1];
return note_move_items_within_or_between(nitems, froms, tos, ms->from, ms->from, ms->filenum, ms->logger, old_N, new_N);
}
static int note_move_items_between (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N, u_int32_t new_N, void *extra) {
struct move_struct *ms=extra;
int r = note_move_items_within_or_between(nitems, froms, tos, ms->from, ms->to, ms->filenum, ms->logger, old_N, new_N);
if (r!=0) return r;
u_int32_t i;
u_int32_t diffsize = 0;
u_int32_t diff_fp = 0;
for (i=0; i<nitems; i++) {
diffsize += PMA_ITEM_OVERHEAD + leafentry_disksize(items[i].data);
diff_fp += toku_le_crc(items[i].data);
}
ms->from->local_fingerprint -= ms->from->rand4fingerprint * diff_fp;
ms->to->local_fingerprint += ms->to->rand4fingerprint * diff_fp;
ms->from->u.l.n_bytes_in_buffer -= diffsize;
ms->to->u.l.n_bytes_in_buffer += diffsize;
return 0;
}
struct delete_struct {
BRTNODE node;
};
#if 0
static int brt_leaf_delete_callback (u_int32_t slotnum, u_int32_t len, void *data, void *extra) {
struct delete_struct *d = extra;
d->node->local_fingerprint -= d->node->rand4fingerprint*toku_calccrc32_kvpair_struct(data);
d->node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + leafentry_disksize(data);
toku_mempool_mfree(&d->node->u.l.buffer_mempool, data, len);
d->node->dirty=1;
// Should use slotnum for logging
slotnum=slotnum; //????
return 0; return 0;
} }
#endif
static int brtleaf_split (TOKULOGGER logger, FILENUM filenum, BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk) { static int brtleaf_split (TOKULOGGER logger, FILENUM filenum, BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, DBT *splitk) {
BRTNODE B; BRTNODE B;
...@@ -448,37 +374,74 @@ static int brtleaf_split (TOKULOGGER logger, FILENUM filenum, BRT t, BRTNODE nod ...@@ -448,37 +374,74 @@ static int brtleaf_split (TOKULOGGER logger, FILENUM filenum, BRT t, BRTNODE nod
//printf("%s:%d A is at %lld\n", __FILE__, __LINE__, A->thisnodename); //printf("%s:%d A is at %lld\n", __FILE__, __LINE__, A->thisnodename);
//printf("%s:%d B is at %lld nodesize=%d\n", __FILE__, __LINE__, B->thisnodename, B->nodesize); //printf("%s:%d B is at %lld nodesize=%d\n", __FILE__, __LINE__, B->thisnodename, B->nodesize);
assert(node->height>0 || node->u.l.buffer!=0); assert(node->height>0 || node->u.l.buffer!=0);
verify_all_in_mempool(node);
LEAFENTRY *MALLOC_N(toku_omt_size(node->u.l.buffer), leafentries);
u_int32_t n_leafentries = toku_omt_size(node->u.l.buffer);
toku_omt_iterate(node->u.l.buffer, fill_buf, leafentries);
u_int32_t break_at = 0;
{
u_int32_t i;
u_int32_t sumlesizes=0;
for (i=0; i<n_leafentries; i++) sumlesizes += leafentry_disksize(leafentries[i]);
u_int32_t sumsofar=0;
for (i=0; i<n_leafentries; i++) {
assert(toku_mempool_inrange(&node->u.l.buffer_mempool, leafentries[i], leafentry_memsize(leafentries[i])));
sumsofar += leafentry_disksize(leafentries[i]);
if (sumsofar*2 >= sumlesizes) {
break_at = i;
break;
}
}
}
// Now we know where we are going to break it
OMT old_omt = node->u.l.buffer;
toku_omt_destroy(&B->u.l.buffer); // Destroy B's empty OMT, so I can rebuild it from an array
{
u_int32_t i;
u_int32_t diff_fp = 0;
u_int32_t diff_size = 0;
for (i=break_at; i<n_leafentries; i++) {
LEAFENTRY oldle = leafentries[i];
LEAFENTRY newle = toku_mempool_malloc(&B->u.l.buffer_mempool, leafentry_memsize(oldle), 1);
assert(newle!=0); // it's a fresh mpool, so this should always work.
diff_fp += toku_le_crc(oldle);
diff_size += OMT_ITEM_OVERHEAD + leafentry_disksize(oldle);
memcpy(newle, oldle, leafentry_memsize(oldle));
toku_mempool_mfree(&node->u.l.buffer_mempool, oldle, leafentry_memsize(oldle));
leafentries[i] = newle;
}
node->local_fingerprint -= node->rand4fingerprint * diff_fp;
B ->local_fingerprint += B ->rand4fingerprint * diff_fp;
node->u.l.n_bytes_in_buffer -= diff_size;
B ->u.l.n_bytes_in_buffer += diff_size;
}
int r; int r;
struct move_struct ms = {.logger=logger, .filenum=filenum, .from=node, .to=B}; if ((r = toku_omt_create_from_sorted_array(&B->u.l.buffer, leafentries+break_at, n_leafentries-break_at))) return r;
//toku_verify_gpma(node->u.l.buffer); if ((r = toku_omt_create_from_sorted_array(&node->u.l.buffer, leafentries, break_at))) return r;
GPMA_ITERATE(node->u.l.buffer, idx, vlen, vdata,
({ toku_free(leafentries);
char *p=vdata;
//printf("%s:%d %d:%p ", __FILE__, __LINE__, idx, p); verify_all_in_mempool(node);
assert((char*)node->u.l.buffer_mempool.base<= p && p < (char*)node->u.l.buffer_mempool.base+node->u.l.buffer_mempool.size ); verify_all_in_mempool(B);
}));
r = toku_gpma_split(node->u.l.buffer, B->u.l.buffer, PMA_ITEM_OVERHEAD, toku_omt_destroy(&old_omt);
move_between_mempools, &ms,
note_move_items_within, &ms, LSN lsn;
note_move_items_between, &ms); r = toku_log_leafsplit(logger, &lsn, 0, filenum, node->thisnodename, B->thisnodename, n_leafentries, break_at, node->nodesize, B->rand4fingerprint, (t->flags&TOKU_DB_DUPSORT)!=0);
GPMA_ITERATE(node->u.l.buffer, idx, vlen, vdata, if (logger) {
({ node->log_lsn = lsn;
char *p=vdata; B->log_lsn = lsn;
//printf("%s:%d %d:%p ", __FILE__, __LINE__, idx, p); }
assert((char*)node->u.l.buffer_mempool.base<= p && p < (char*)node->u.l.buffer_mempool.base+node->u.l.buffer_mempool.size );
}));
GPMA_ITERATE(B->u.l.buffer, idx, vlen, vdata,
({
char *p=vdata;
//printf("%s:%d %d:%p\n", __FILE__, __LINE__, idx, p);
assert((char*)B->u.l.buffer_mempool.base<= p && p < (char*)B->u.l.buffer_mempool.base+node->u.l.buffer_mempool.size );
}));
//toku_verify_gpma(node->u.l.buffer); //toku_verify_gpma(node->u.l.buffer);
//toku_verify_gpma(B->u.l.buffer); //toku_verify_gpma(B->u.l.buffer);
if (splitk) { if (splitk) {
memset(splitk, 0, sizeof *splitk); memset(splitk, 0, sizeof *splitk);
LEAFENTRY le=ms.last_pair_remaining_in_from.data; LEAFENTRY le;
r=toku_omt_fetch(node->u.l.buffer, toku_omt_size(node->u.l.buffer)-1, &le);
assert(r==0); // that fetch should have worked.
if (node->flags&TOKU_DB_DUPSORT) { if (node->flags&TOKU_DB_DUPSORT) {
splitk->size = le_any_keylen(le)+le_any_vallen(le); splitk->size = le_any_keylen(le)+le_any_vallen(le);
splitk->data = kv_pair_malloc(le_any_key(le), le_any_keylen(le), le_any_val(le), le_any_vallen(le)); splitk->data = kv_pair_malloc(le_any_key(le), le_any_keylen(le), le_any_val(le), le_any_vallen(le));
...@@ -669,8 +632,8 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT ...@@ -669,8 +632,8 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT
assert(node->height>0); /* Not a leaf. */ assert(node->height>0); /* Not a leaf. */
DBT *k = cmd->u.id.key; DBT *k = cmd->u.id.key;
DBT *v = cmd->u.id.val; DBT *v = cmd->u.id.val;
unsigned int newsize = toku_serialize_brtnode_size(child) + k->size + v->size + KEY_VALUE_OVERHEAD; unsigned int newsize_bounded = toku_serialize_brtnode_size(child) + k->size + v->size + KEY_VALUE_OVERHEAD + LE_OVERHEAD_BOUND;
newsize += (child->height > 0) ? BRT_CMD_OVERHEAD : PMA_ITEM_OVERHEAD; newsize_bounded += (child->height > 0) ? BRT_CMD_OVERHEAD : OMT_ITEM_OVERHEAD;
#if 0 #if 0
// This stuff is wrong. And we don't have a test to differentiate this from the previous line of code. // This stuff is wrong. And we don't have a test to differentiate this from the previous line of code.
unsigned int additionaloverhead = (child->height > 0) ? BRT_CMD_OVERHEAD : PMA_ITEM_OVERHEAD; unsigned int additionaloverhead = (child->height > 0) ? BRT_CMD_OVERHEAD : PMA_ITEM_OVERHEAD;
...@@ -680,8 +643,9 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT ...@@ -680,8 +643,9 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT
printf("%s:%d\n", __FILE__, __LINE__); printf("%s:%d\n", __FILE__, __LINE__);
} }
#endif #endif
int to_child = newsize <= child->nodesize;
if (toku_brt_debug_mode) { int to_child = newsize_bounded <= child->nodesize;
if (0) {
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node); printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)k->data, to_child? "child" : "hash", childnum_of_node);
if (childnum_of_node+1<node->u.n.n_children) { if (childnum_of_node+1<node->u.n.n_children) {
DBT k2; DBT k2;
...@@ -706,6 +670,7 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT ...@@ -706,6 +670,7 @@ static int push_brt_cmd_down_only_if_it_wont_push_more_else_put_here (BRT t, BRT
} else { } else {
r=insert_to_buffer_in_nonleaf(node, childnum_of_node, k, v, cmd->type, cmd->xid); r=insert_to_buffer_in_nonleaf(node, childnum_of_node, k, v, cmd->type, cmd->xid);
} }
assert(newsize_bounded >= toku_serialize_brtnode_size(child));
fixup_child_fingerprint(node, childnum_of_node, child, t, logger); fixup_child_fingerprint(node, childnum_of_node, child, t, logger);
return r; return r;
} }
...@@ -1142,9 +1107,8 @@ int leafval_bessel_le_provpair (TXNID xid __attribute__((__unused__)), ...@@ -1142,9 +1107,8 @@ int leafval_bessel_le_provpair (TXNID xid __attribute__((__unused__)),
return leafval_bessel_le_committed(klen, kval, plen, pval, be); return leafval_bessel_le_committed(klen, kval, plen, pval, be);
} }
int toku_cmd_leafval_bessel (u_int32_t dlen __attribute__((__unused__)), void *dval, void *extra) { int toku_cmd_leafval_bessel (LEAFENTRY le, void *extra) {
struct cmd_leafval_bessel_extra *be = extra; struct cmd_leafval_bessel_extra *be = extra;
LEAFENTRY le = dval;
LESWITCHCALL(le, leafval_bessel, be); LESWITCHCALL(le, leafval_bessel, be);
} }
...@@ -1290,7 +1254,7 @@ static int apply_cmd_to_le_provpair (TXNID xid __attribute__((__unused__)), ...@@ -1290,7 +1254,7 @@ static int apply_cmd_to_le_provpair (TXNID xid __attribute__((__unused__)),
} }
static int apply_cmd_to_leaf (BRT_CMD cmd, static int apply_cmd_to_leaf (BRT_CMD cmd,
u_int32_t oldlen, void *stored_data, // NULL if there was no stored data. void *stored_data, // NULL if there was no stored data.
u_int32_t *newlen, u_int32_t *disksize, LEAFENTRY *new_data) { u_int32_t *newlen, u_int32_t *disksize, LEAFENTRY *new_data) {
if (stored_data==0) { if (stored_data==0) {
switch (cmd->type) { switch (cmd->type) {
...@@ -1318,7 +1282,6 @@ static int apply_cmd_to_leaf (BRT_CMD cmd, ...@@ -1318,7 +1282,6 @@ static int apply_cmd_to_leaf (BRT_CMD cmd,
assert(0); assert(0);
return 0; return 0;
} else { } else {
assert(oldlen==leafentry_memsize(stored_data));
LESWITCHCALL(stored_data, apply_cmd_to, cmd, LESWITCHCALL(stored_data, apply_cmd_to, cmd,
newlen, disksize, new_data); newlen, disksize, new_data);
} }
...@@ -1345,42 +1308,37 @@ int should_compare_both_keys (BRTNODE node, BRT_CMD cmd) { ...@@ -1345,42 +1308,37 @@ int should_compare_both_keys (BRTNODE node, BRT_CMD cmd) {
} }
static int brt_leaf_apply_cmd_once (BRT t, BRTNODE node, BRT_CMD cmd, TOKULOGGER logger, static int brt_leaf_apply_cmd_once (BRT t, BRTNODE node, BRT_CMD cmd, TOKULOGGER logger,
u_int32_t idx, u_int32_t storedlen, LEAFENTRY le) { u_int32_t idx, LEAFENTRY le) {
FILENUM filenum = toku_cachefile_filenum(t->cf); FILENUM filenum = toku_cachefile_filenum(t->cf);
u_int32_t newlen, newdisksize; u_int32_t newlen, newdisksize;
LEAFENTRY newdata; LEAFENTRY newdata;
int r = apply_cmd_to_leaf(cmd, storedlen, le, &newlen, &newdisksize, &newdata); int r = apply_cmd_to_leaf(cmd, le, &newlen, &newdisksize, &newdata);
if (r!=0) return r; if (r!=0) return r;
if (newdata) assert(newdisksize == leafentry_disksize(newdata)); if (newdata) assert(newdisksize == leafentry_disksize(newdata));
if (le) { if (le) {
// It's there, note that it's gone and remove it from the mempool // It's there, note that it's gone and remove it from the mempool
r = toku_log_deleteleafentry(logger, &node->log_lsn, 0, filenum, node->thisnodename, idx, le); if ((r = toku_log_deleteleafentry(logger, &node->log_lsn, 0, filenum, node->thisnodename, idx, le))) return r;
if (r!=0) return r;
struct move_struct ms = {.logger=logger, .filenum=filenum, .from=node, .to=node}; if ((r = toku_omt_delete_at(node->u.l.buffer, idx))) return r;
r = toku_gpma_delete_at_index(node->u.l.buffer, idx, note_move_items_within, &ms);
if (r!=0) return r;
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + leafentry_disksize(le); node->u.l.n_bytes_in_buffer -= OMT_ITEM_OVERHEAD + leafentry_disksize(le);
node->local_fingerprint -= node->rand4fingerprint * toku_le_crc(le); node->local_fingerprint -= node->rand4fingerprint * toku_le_crc(le);
toku_mempool_mfree(&node->u.l.buffer_mempool, 0, storedlen); // Must pass 0, since le may be no good any more. toku_mempool_mfree(&node->u.l.buffer_mempool, 0, leafentry_memsize(le)); // Must pass 0, since le may be no good any more.
} }
if (newdata) { if (newdata) {
struct move_struct ms = {.logger=logger, .filenum=filenum, .from=node, .to=node}; LEAFENTRY new_le = mempool_malloc_from_omt(node->u.l.buffer, &node->u.l.buffer_mempool, newlen);
struct cmd_leafval_bessel_extra be = {t, cmd, node->flags & TOKU_DB_DUPSORT}; assert(new_le);
LEAFENTRY new_le = mempool_malloc_from_gpma(node->u.l.buffer, &node->u.l.buffer_mempool, newlen);
memcpy(new_le, newdata, newlen); memcpy(new_le, newdata, newlen);
r = toku_gpma_insert_bessel(node->u.l.buffer, newlen, new_le, toku_cmd_leafval_bessel, &be, note_move_items_within, &ms, &idx); if ((r = toku_omt_insert_at(node->u.l.buffer, new_le, idx))) return r;
if (r!=0) return r;
r = toku_log_insertleafentry(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, idx, newdata); if ((r = toku_log_insertleafentry(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), node->thisnodename, idx, newdata))) return r;
if (r!=0) return r;
assert(newdisksize == leafentry_disksize(newdata)); assert(newdisksize == leafentry_disksize(newdata));
node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + newdisksize; node->u.l.n_bytes_in_buffer += OMT_ITEM_OVERHEAD + newdisksize;
node->local_fingerprint += node->rand4fingerprint*toku_le_crc(newdata); node->local_fingerprint += node->rand4fingerprint*toku_le_crc(newdata);
toku_free(newdata); toku_free(newdata);
} }
...@@ -1397,8 +1355,7 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd, ...@@ -1397,8 +1355,7 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd,
assert(node->height==0); assert(node->height==0);
FILENUM filenum = toku_cachefile_filenum(t->cf); FILENUM filenum = toku_cachefile_filenum(t->cf);
u_int32_t storedlen; LEAFENTRY storeddata;
void *storeddata;
u_int32_t idx; u_int32_t idx;
int r; int r;
int compare_both = should_compare_both_keys(node, cmd); int compare_both = should_compare_both_keys(node, cmd);
...@@ -1406,14 +1363,15 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd, ...@@ -1406,14 +1363,15 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd,
switch (cmd->type) { switch (cmd->type) {
case BRT_INSERT: case BRT_INSERT:
r = toku_gpma_lookup_bessel(node->u.l.buffer, toku_cmd_leafval_bessel, 0, &be,
&storedlen, &storeddata, &idx); r = toku_omt_find_zero(node->u.l.buffer, toku_cmd_leafval_bessel, &be,
&storeddata, &idx);
if (r==DB_NOTFOUND) { if (r==DB_NOTFOUND) {
storeddata = 0; storeddata = 0;
} else if (r!=0) } else if (r!=0)
return r; return r;
r = brt_leaf_apply_cmd_once(t, node, cmd, logger, idx, storedlen, storeddata); r = brt_leaf_apply_cmd_once(t, node, cmd, logger, idx, storeddata);
if (r!=0) return r; if (r!=0) return r;
break; break;
case BRT_DELETE_BOTH: case BRT_DELETE_BOTH:
...@@ -1421,8 +1379,8 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd, ...@@ -1421,8 +1379,8 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd,
case BRT_COMMIT_BOTH: case BRT_COMMIT_BOTH:
// Delete the one item // Delete the one item
r = toku_gpma_lookup_bessel(node->u.l.buffer, toku_cmd_leafval_bessel, 0, &be, r = toku_omt_find_zero(node->u.l.buffer, toku_cmd_leafval_bessel, &be,
&storedlen, &storeddata, &idx); &storeddata, &idx);
if (r == DB_NOTFOUND) break; if (r == DB_NOTFOUND) break;
if (r != 0) return r; if (r != 0) return r;
...@@ -1430,7 +1388,7 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd, ...@@ -1430,7 +1388,7 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd,
static int count=0; static int count=0;
count++; count++;
r = brt_leaf_apply_cmd_once(t, node, cmd, logger, idx, storedlen, storeddata); r = brt_leaf_apply_cmd_once(t, node, cmd, logger, idx, storeddata);
if (r!=0) return r; if (r!=0) return r;
VERIFY_NODE(node); VERIFY_NODE(node);
...@@ -1441,8 +1399,8 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd, ...@@ -1441,8 +1399,8 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd,
case BRT_COMMIT_ANY: case BRT_COMMIT_ANY:
// Delete all the matches // Delete all the matches
r = toku_gpma_lookup_bessel(node->u.l.buffer, toku_cmd_leafval_bessel, 0, &be, r = toku_omt_find_zero(node->u.l.buffer, toku_cmd_leafval_bessel, &be,
&storedlen, &storeddata, &idx); &storeddata, &idx);
if (r == DB_NOTFOUND) break; if (r == DB_NOTFOUND) break;
if (r != 0) return r; if (r != 0) return r;
...@@ -1450,15 +1408,15 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd, ...@@ -1450,15 +1408,15 @@ static int brt_leaf_put_cmd (BRT t, BRTNODE node, BRT_CMD cmd,
int vallen = le_any_vallen(storeddata); int vallen = le_any_vallen(storeddata);
void *save_val = toku_memdup(le_any_val(storeddata), vallen); void *save_val = toku_memdup(le_any_val(storeddata), vallen);
r = brt_leaf_apply_cmd_once(t, node, cmd, logger, idx, storedlen, storeddata); r = brt_leaf_apply_cmd_once(t, node, cmd, logger, idx, storeddata);
if (r!=0) return r; if (r!=0) return r;
// Now we must find the next one. // Now we must find the next one.
DBT valdbt; DBT valdbt;
BRT_CMD_S ncmd = { cmd->type, cmd->xid, .u.id={cmd->u.id.key, toku_fill_dbt(&valdbt, save_val, vallen)}}; BRT_CMD_S ncmd = { cmd->type, cmd->xid, .u.id={cmd->u.id.key, toku_fill_dbt(&valdbt, save_val, vallen)}};
struct cmd_leafval_bessel_extra nbe = {t, &ncmd, 1}; struct cmd_leafval_bessel_extra nbe = {t, &ncmd, 1};
r = toku_gpma_lookup_bessel(node->u.l.buffer, toku_cmd_leafval_bessel, +1, &nbe, r = toku_omt_find(node->u.l.buffer, toku_cmd_leafval_bessel, &nbe, +1,
&storedlen, &storeddata, &idx); &storeddata, &idx);
toku_free(save_val); toku_free(save_val);
if (r!=0) break; if (r!=0) break;
...@@ -1812,8 +1770,7 @@ static int setup_initial_brt_root_node (BRT t, DISKOFF offset, TOKULOGGER logger ...@@ -1812,8 +1770,7 @@ static int setup_initial_brt_root_node (BRT t, DISKOFF offset, TOKULOGGER logger
return r; return r;
} }
// verify_local_fingerprint_nonleaf(node); // verify_local_fingerprint_nonleaf(node);
toku_log_newbrtnode(logger, (LSN*)0, 0, toku_cachefile_filenum(t->cf), offset, 0, t->h->nodesize, (t->flags&TOKU_DB_DUPSORT)!=0, node->rand4fingerprint); toku_log_newbrtnode(logger, &node->log_lsn, 0, toku_cachefile_filenum(t->cf), offset, 0, t->h->nodesize, (t->flags&TOKU_DB_DUPSORT)!=0, node->rand4fingerprint);
toku_update_brtnode_loggerlsn(node, logger);
r = toku_unpin_brtnode(t, node); r = toku_unpin_brtnode(t, node);
if (r!=0) { if (r!=0) {
toku_free(node); toku_free(node);
...@@ -2211,9 +2168,8 @@ static int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk, ...@@ -2211,9 +2168,8 @@ static int brt_init_new_root(BRT brt, BRTNODE nodea, BRTNODE nodeb, DBT splitk,
{ {
BYTESTRING bs = { .len = kv_pair_keylen(newroot->u.n.childkeys[0]), BYTESTRING bs = { .len = kv_pair_keylen(newroot->u.n.childkeys[0]),
.data = kv_pair_key(newroot->u.n.childkeys[0]) }; .data = kv_pair_key(newroot->u.n.childkeys[0]) };
r=toku_log_setpivot(logger, (LSN*)0, 0, toku_cachefile_filenum(brt->cf), newroot_diskoff, 0, bs); r=toku_log_setpivot(logger, &newroot->log_lsn, 0, toku_cachefile_filenum(brt->cf), newroot_diskoff, 0, bs);
if (r!=0) return r; if (r!=0) return r;
toku_update_brtnode_loggerlsn(newroot, logger);
} }
r = toku_unpin_brtnode(brt, nodea); r = toku_unpin_brtnode(brt, nodea);
if (r!=0) return r; if (r!=0) return r;
...@@ -2607,7 +2563,7 @@ int pair_leafval_bessel_le_provpair (TXNID xid __attribute__((__unused__)), ...@@ -2607,7 +2563,7 @@ int pair_leafval_bessel_le_provpair (TXNID xid __attribute__((__unused__)),
} }
static int bessel_from_search_t (u_int32_t dlen __attribute__((__unused__)), void *leafval, void *extra) { static int bessel_from_search_t (LEAFENTRY leafval, void *extra) {
brt_search_t *search = extra; brt_search_t *search = extra;
LESWITCHCALL(leafval, pair_leafval_bessel, search); LESWITCHCALL(leafval, pair_leafval_bessel, search);
} }
...@@ -2621,14 +2577,13 @@ static int brt_search_leaf_node(BRT brt, BRTNODE node, brt_search_t *search, DBT ...@@ -2621,14 +2577,13 @@ static int brt_search_leaf_node(BRT brt, BRTNODE node, brt_search_t *search, DBT
} }
return EINVAL; // This return and the goto are a hack to get both compile-time and run-time checking on enum return EINVAL; // This return and the goto are a hack to get both compile-time and run-time checking on enum
ok: ; ok: ;
u_int32_t len; LEAFENTRY data;
void * data;
u_int32_t idx; u_int32_t idx;
int r = toku_gpma_lookup_bessel(node->u.l.buffer, int r = toku_omt_find(node->u.l.buffer,
bessel_from_search_t, bessel_from_search_t,
direction, search,
search, direction,
&len, &data, &idx); &data, &idx);
if (r!=0) return r; if (r!=0) return r;
LEAFENTRY le = data; LEAFENTRY le = data;
...@@ -2639,15 +2594,15 @@ static int brt_search_leaf_node(BRT brt, BRTNODE node, brt_search_t *search, DBT ...@@ -2639,15 +2594,15 @@ static int brt_search_leaf_node(BRT brt, BRTNODE node, brt_search_t *search, DBT
switch (search->direction) { switch (search->direction) {
case BRT_SEARCH_LEFT: case BRT_SEARCH_LEFT:
idx++; idx++;
if (idx>toku_gpma_index_limit(node->u.l.buffer)) return DB_NOTFOUND; if (idx>=toku_omt_size(node->u.l.buffer)) return DB_NOTFOUND;
break; break;
case BRT_SEARCH_RIGHT: case BRT_SEARCH_RIGHT:
if (idx==0) return DB_NOTFOUND; if (idx==0) return DB_NOTFOUND;
idx--; idx--;
break; break;
} }
if (!toku_gpma_valididx(node->u.l.buffer, idx)) continue; if (idx>=toku_omt_size(node->u.l.buffer)) continue;
r = toku_gpma_get_from_index(node->u.l.buffer, idx, &len, &data); r = toku_omt_fetch(node->u.l.buffer, idx, &data);
assert(r==0); // we just validated the index assert(r==0); // we just validated the index
le = data; le = data;
if (!le_is_provdel(le)) goto got_a_good_value; if (!le_is_provdel(le)) goto got_a_good_value;
...@@ -3088,10 +3043,22 @@ int toku_brt_height_of_root(BRT brt, int *height) { ...@@ -3088,10 +3043,22 @@ int toku_brt_height_of_root(BRT brt, int *height) {
return 0; return 0;
} }
int toku_gpma_compress_kvspace (GPMA pma, struct mempool *memp); struct omt_compressor_state {
void *mempool_malloc_from_gpma(GPMA pma, struct mempool *mp, size_t size); struct mempool *new_kvspace;
OMT omt;
};
int toku_gpma_compress_kvspace (GPMA pma, struct mempool *memp) { static int move_it (LEAFENTRY le, u_int32_t idx, void *v) {
struct omt_compressor_state *oc = v;
u_int32_t size = leafentry_memsize(le);
LEAFENTRY newdata = toku_mempool_malloc(oc->new_kvspace, size, 1);
assert(newdata); // we do this on a fresh mempool, so nothing bad shouldhapepn
memcpy(newdata, le, size);
toku_omt_set_at(oc->omt, newdata, idx);
return 0;
}
int toku_omt_compress_kvspace (OMT omt, struct mempool *memp) {
if (toku_mempool_get_frag_size(memp) == 0) if (toku_mempool_get_frag_size(memp) == 0)
return -1; return -1;
void *newmem = toku_malloc(memp->size); void *newmem = toku_malloc(memp->size);
...@@ -3099,25 +3066,19 @@ int toku_gpma_compress_kvspace (GPMA pma, struct mempool *memp) { ...@@ -3099,25 +3066,19 @@ int toku_gpma_compress_kvspace (GPMA pma, struct mempool *memp) {
return -2; return -2;
struct mempool new_kvspace; struct mempool new_kvspace;
toku_mempool_init(&new_kvspace, newmem, memp->size); toku_mempool_init(&new_kvspace, newmem, memp->size);
GPMA_ITERATE(pma, idx, len, data, struct omt_compressor_state oc = { &new_kvspace, omt };
({ toku_omt_iterate(omt, move_it, &oc);
void *newdata = toku_mempool_malloc(&new_kvspace, (size_t)len, 4);
assert(newdata);
memcpy(newdata, data, (size_t)len);
toku_gpma_set_at_index(pma, idx, len, newdata);
// toku_verify_gpma(pma);
}));
toku_free(memp->base); toku_free(memp->base);
*memp = new_kvspace; *memp = new_kvspace;
// toku_verify_gpma(pma);
return 0; return 0;
} }
void *mempool_malloc_from_gpma(GPMA pma, struct mempool *mp, size_t size) { void *mempool_malloc_from_omt(OMT omt, struct mempool *mp, size_t size) {
void *v = toku_mempool_malloc(mp, size, 4); void *v = toku_mempool_malloc(mp, size, 1);
if (v==0) { if (v==0) {
if (0 == toku_gpma_compress_kvspace(pma, mp)) { if (0 == toku_omt_compress_kvspace(omt, mp)) {
v = toku_mempool_malloc(mp, size, 4); v = toku_mempool_malloc(mp, size, 1);
assert(v); assert(v);
} }
} }
......
...@@ -219,7 +219,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height) ...@@ -219,7 +219,7 @@ static void initialize_brtnode (BRT t, BRTNODE n, DISKOFF nodename, int height)
n->thisnodename = nodename; n->thisnodename = nodename;
n->disk_lsn.lsn = 0; // a new one can always be 0. n->disk_lsn.lsn = 0; // a new one can always be 0.
n->log_lsn = n->disk_lsn; n->log_lsn = n->disk_lsn;
n->layout_version = 4; n->layout_version = 5;
n->height = height; n->height = height;
n->rand4fingerprint = random(); n->rand4fingerprint = random();
n->local_fingerprint = 0; n->local_fingerprint = 0;
......
...@@ -107,12 +107,13 @@ void dump_node (int f, DISKOFF off) { ...@@ -107,12 +107,13 @@ void dump_node (int f, DISKOFF off) {
} }
} else { } else {
printf(" n_bytes_in_buffer=%d\n", n->u.l.n_bytes_in_buffer); printf(" n_bytes_in_buffer=%d\n", n->u.l.n_bytes_in_buffer);
printf(" items_in_buffer =%d\n", toku_gpma_n_entries(n->u.l.buffer)); printf(" items_in_buffer =%d\n", toku_omt_size(n->u.l.buffer));
GPMA_ITERATE(n->u.l.buffer, idx, len, data, int print_le(LEAFENTRY le, u_int32_t UU(idx), void *UU(v)) {
({ print_leafentry(stdout, le);
print_leafentry(stdout, data); printf("\n");
printf("\n"); return 0;
})); }
toku_omt_iterate(n->u.l.buffer, print_le, 0);
} }
} }
......
...@@ -98,4 +98,6 @@ typedef struct brt_cmd BRT_CMD_S, *BRT_CMD; ...@@ -98,4 +98,6 @@ typedef struct brt_cmd BRT_CMD_S, *BRT_CMD;
#define UU(x) x __attribute__((__unused__)) #define UU(x) x __attribute__((__unused__))
typedef struct leafentry *LEAFENTRY;
#endif #endif
#include "memory.h"
struct gpma {
enum typ_tag tag;
unsigned int N; /* How long is the array? Always a power of two >= 4. */
u_int32_t n_items_present; /* How many array elements are non-null. */
struct gitem *items; /* A malloced array. If any item's DATA is null, then it's not in use. */
double udt_step; /* upper density threshold step */
/* Each doubling decreases the density by density step.
* For example if array_len=256 and uplgN=8 then there are 5 doublings.
* Regions of size 8 are full. Regions of size 16 are 90% full.
* Regions of size 32 are 80% full. Regions of size 64 are 70% full.
* Regions of size 128 are 60% full. Regions of size 256 are 50% full.
* The density step is 0.10. */
double ldt_step; /* lower density threshold step */
};
#define GPMA_MIN_ARRAY_SIZE 4
/* density thresholds */
#define GPMA_LDT_HIGH 0.25
#define GPMA_LDT_LOW 0.40
#define GPMA_UDT_HIGH 1.00
#define GPMA_UDT_LOW 0.50
/* Expose these for testing purposes */
u_int32_t toku_gpma_find_index_bes (GPMA pma, gpma_besselfun_t besf, int direction, void *extra, int *found);
u_int32_t toku_gpma_find_index (GPMA pma, u_int32_t len, void *data, gpma_compare_fun_t compare, void *extra, int *found);
int toku_lg (unsigned int n);
u_int32_t toku_hyperceil (u_int32_t v);
int toku_max_int (int, int);
int toku_gpma_smooth_region (GPMA pma,
u_int32_t lo, u_int32_t hi,
u_int32_t count, // The number of nonnull values
u_int32_t idx, u_int32_t *newidxp, gpma_renumber_callback_t rcall, void *extra,
u_int32_t old_N);
int toku_make_space_at (GPMA pma, u_int32_t idx, u_int32_t *newidx, gpma_renumber_callback_t rcall, void *extra);
void toku_gpma_distribute (GPMA pma,
u_int32_t lo, u_int32_t hi,
u_int32_t count,
struct gitem *items, // some of these may be NULL data, be we leave space for them anyway.
/*out*/ u_int32_t *tos // the indices where the values end up (we fill this in)
);
int toku_smooth_deleted_region (GPMA pma, u_int32_t minidx, u_int32_t maxidx, gpma_renumber_callback_t renumberf, void *extra_for_renumberf);
/* General PMA. */
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include <errno.h>
#include "gpma.h"
#include "yerror.h"
#include "toku_assert.h"
#include "memory.h"
// Need this for DB_KEYEXIST
#include "../include/db.h"
#include "gpma-internal.h"
// Find the ceiling of lg n. */
int toku_lg (unsigned int n) {
int result=0;
unsigned int two_to_result = 1;
while (two_to_result<n) {
result++;
two_to_result*=2;
}
return result;
}
/* find the smallest power of 2 >= n */
inline u_int32_t toku_hyperceil (u_int32_t v) {
u_int32_t n = 1;
while (n < v)
n *= 2;
return n;
}
/* Calculate densitysteps and uplgN, given N. */
static void calculate_parameters (GPMA pma) {
unsigned int N = toku_gpma_index_limit(pma);
int lgN = toku_lg(N);
int n_divisions = lgN;
//printf("uplgN = %d n_divisions=%d\n", pma->uplgN, n_divisions);
assert(n_divisions>0);
pma->udt_step = (GPMA_UDT_HIGH - GPMA_UDT_LOW)/n_divisions;
pma->ldt_step = (GPMA_LDT_HIGH - GPMA_LDT_LOW)/n_divisions;
}
int toku_gpma_create(GPMA*gpma, int initial_index_limit) {
if (initial_index_limit && (initial_index_limit&(initial_index_limit-1))) return EINVAL; // must be a power of two.
TAGMALLOC(GPMA, result);
if (result==0) return errno;
result->N = initial_index_limit ? initial_index_limit : GPMA_MIN_ARRAY_SIZE;
result->n_items_present=0;
calculate_parameters(result);
MALLOC_N(result->N, result->items);
if (result->items==0) { int r=errno; toku_free(result); return r; }
{
u_int32_t i;
for (i=0; i<result->N; i++) result->items[i].data=0;
}
*gpma=result;
return 0;
}
void toku_gpma_free(GPMA*gpmap, gpma_free_callback_t freeme,void*extra) {
u_int32_t i;
GPMA pma=*gpmap;
for (i=0; i<pma->N; i++) {
if (pma->items[i].data) {
if (freeme)
freeme(pma->items[i].len, pma->items[i].data, extra);
pma->items[i].data=0;
}
}
toku_free(pma->items);
toku_free(pma);
*gpmap=0;
}
u_int32_t toku_gpma_n_entries(GPMA pma) {
return pma->n_items_present;
}
u_int32_t toku_gpma_index_limit(GPMA pma) {
return pma->N;
}
// If direction==0 then find any match for which the bessel gives 0. *found is set to 1 iff something with 0. The return value is the place where the zero is (if found), or the place where it would go (if there's a value there, then that value goes after the zero.)
// If more than one value returns 0, return the left most such value.
// If direction>0 then find the first match for which bessel gives >0. *found is set to 1 iff something with >0. The return value is the index of the leftmost such value (if found). In the not-found case, all items are <=0 and the return value is pma->N.
// If direction<0 then find the last match for which bessel gives <0. *found is set to 1 iff something with <0. The return value is the index of the rightmost such value (if found). In the not-found case, all items are >=0 and the return value is 0.
u_int32_t toku_gpma_find_index_bes (GPMA pma, gpma_besselfun_t besf, int direction, void *extra, int *found) {
if (direction==0) {
int lo=0, hi=pma->N;
int foundone = 0;
u_int32_t foundidx = 0;
while (lo<hi) {
int mi = (lo+hi)/2;
int look = mi;
while (look<hi && pma->items[look].data==0) look++;
if (look>=hi) {
// went too far, so mi is new hi
hi=mi;
} else {
int cmp = besf(pma->items[look].len, pma->items[look].data, extra);
if (cmp==0) {
/* We found a match. */
foundone = 1;
foundidx=look;
/* But keep looking to the left. */
hi=mi;
} else if (cmp>0) {
hi=mi;
} else {
lo=look+1;
}
}
}
*found = foundone;
if (foundone) return foundidx;
else return lo;
} else if (direction<0) {
// Find the rightmost negative value.
#if 0
// Linear-time code, for ease of reading
u_int32_t i;
for (i=pma->N; i>0; i--) {
if (pma->items[i-1].data) {
int cmp = besf(pma->items[i-1].len, pma->items[i-1].data, extra);
if (cmp<0) {
*found=1;
return i-1;
}
}
}
*found=0;
return 0;
#else
// direction<0. Log-time code. For performance.
int lo=0, hi=pma->N;
int foundone=0;
int answer=lo;
while (lo<hi) {
int mi = (lo+hi)/2;
int look = mi;
while (look<hi && pma->items[look].data==0) look++;
if (look>=hi) {
// there was nothing in the right half
hi=mi;
} else {
int cmp = besf(pma->items[look].len, pma->items[look].data, extra);
if (cmp>=0) {
// look is too big.
hi=mi;
} else {
// look is is a good answer, so set lo to that. From now on we can only change lo if we find another good answer.
answer=look;
foundone=1;
lo=look+1;
}
}
}
*found = foundone;
return answer;
#endif
} else {
// Find the leftmost postive value.
#if 0
// Linear-time code, for ease of reading
u_int32_t i;
for (i=0; i<pma->N; i++) {
if (pma->items[i].data) {
int cmp = besf(pma->items[i].len, pma->items[i].data, extra);
if (cmp>0) {
*found=1;
return i;
}
}
}
*found=0;
return pma->N;
#else
// direction>0. Log-time code. For performance.
// The loop invariant is that if we found one, then hi is a good answer.
int lo=0, hi=pma->N;
int foundone=0;
while (lo<hi) {
int mi = (lo+hi)/2;
int look = mi;
while (look>lo && pma->items[look].data==0) look--;
if (look==lo && pma->items[look].data==0) {
// There was nothing in the left half.
lo = mi+1;
} else {
int cmp = besf(pma->items[look].len, pma->items[look].data, extra);
if (cmp<=0) {
// look is too small. That means mi is too small.
lo = mi+1;
} else {
// look is a good answer, so set hi to that. From now on we only change hi if we find another good answer.
hi = look;
foundone=1;
}
}
}
*found = foundone;
return hi;
#endif
}
}
// Convert a comparison function against a particular item to a besselfun.
struct convert_extra {
gpma_compare_fun_t comparef;
u_int32_t dlen;
void *dval;
void *extra;
};
static int bessel_from_compare (u_int32_t dlen, void *dval, void *extra) {
struct convert_extra *ce=extra;
return -ce->comparef(ce->dlen, ce->dval, dlen, dval, ce->extra);
}
// Find the place where (len,data) is stored. Return *found==0 iff the item is not actually there.
// Could return anything from 0 to N inclusive.
u_int32_t toku_gpma_find_index (GPMA pma, u_int32_t dlen, void *dval, gpma_compare_fun_t comparef, void *extra, int *found) {
struct convert_extra ce = {comparef, dlen, dval, extra};
return toku_gpma_find_index_bes(pma, bessel_from_compare, 0, &ce, found);
}
// the region from lo (inclusive) to hi (exclusive) is all empty.
// Distribute the data across it.
void toku_gpma_distribute (GPMA pma,
u_int32_t lo, u_int32_t hi,
u_int32_t count,
struct gitem *items, // some of these may be NULL data, be we leave space for them anyway.
/*out*/ u_int32_t *tos) // the indices where the values end up (we fill this in)
{
int width = hi-lo;
u_int32_t nplaced=0;
u_int32_t nused =0;
u_int32_t i;
assert(hi<=pma->N);
for (i=lo; i<hi; i++) {
// if nused/i <= (nitems)/width then place something here
// But don't do floating point divisions
if (nused*(u_int64_t)width <= count*(u_int64_t)i) {
tos[nplaced] = i;
pma->items[i] = items[nplaced++];
nused++;
}
}
assert(nplaced==count);
}
int toku_gpma_smooth_region (GPMA pma,
u_int32_t lo, u_int32_t hi,
u_int32_t count, // The number of nonnull values
u_int32_t idx, u_int32_t *newidxp, // set newidxp to 0 if you don't want to track a particular index
gpma_renumber_callback_t rcall, void *extra,
u_int32_t old_N) {
if (count==0) return 0;
int width = hi-lo;
u_int32_t *MALLOC_N(count, froms); if (!froms) return ENOMEM;
u_int32_t *MALLOC_N(count, tos); if (!tos) { toku_free(froms); return ENOMEM; }
u_int32_t nitems=0;
struct gitem *MALLOC_N(width, temp); if (!temp) { toku_free(tos); toku_free(froms); return ENOMEM; }
u_int32_t i;
u_int32_t idx_goes_to_tmp=pma->N+1; // too big, so we will notice a problem
u_int32_t newidx=pma->N+1;
for (i=lo; i<hi; i++) {
if (newidxp && idx==i) idx_goes_to_tmp=nitems;
if (pma->items[i].data) {
//printf("froms[%d]=%d (count=%d)\n", nitems, i, count);
froms[nitems]=i;
temp [nitems]=pma->items[i];
pma->items[i].data=0;
nitems++;
}
}
if (newidxp && idx==i) idx_goes_to_tmp = nitems;
// Now they are all compacted into temp. Spread them out again
u_int32_t nplaced=0;
u_int32_t nused =0;
u_int64_t nitems_to_place = newidxp ? (nitems+1) : nitems;
for (i=lo; i<hi; i++) {
// if nused/i < (nitems+1)/width then place something here
// But don't do floating point divisions
if (nused*(u_int64_t)width < nitems_to_place*(u_int64_t)(i-lo)) {
if (newidxp && nused==idx_goes_to_tmp) {
newidx=i;
} else {
tos[nplaced] = i;
pma->items[i] = temp[nplaced++];
}
nused++;
}
}
assert((newidxp ? nplaced+1 : nplaced) ==nused);
assert(nplaced==nitems);
int r = 0;
if (rcall) {
r = rcall(nitems, froms, tos, temp, old_N, pma->N, extra);
}
toku_free(temp);
toku_free(froms);
toku_free(tos);
if (newidxp) {
assert(newidx<pma->N);
*newidxp = newidx;
}
return r;
}
static int double_array (GPMA pma, u_int32_t idx, u_int32_t *newidx, gpma_renumber_callback_t rcall, void *extra) {
{
void *olditems = pma->items;
REALLOC_N(pma->N*2, pma->items);
if (pma->items==0) { pma->items=olditems; return errno; }
}
u_int32_t i;
for (i=pma->N; i<pma->N*2; i++) pma->items[i].data=0;
u_int32_t old_N = pma->N;
pma->N *= 2;
calculate_parameters(pma);
int r = toku_gpma_smooth_region(pma, 0, pma->N, pma->n_items_present, idx, newidx, rcall, extra, old_N);
if (r==ENOMEM) {
pma->N /= 2;
// Don't reallocate the memory downward. We'll just hope that the current memory array is OK.
}
return r;
}
int toku_make_space_at (GPMA pma, u_int32_t idx, u_int32_t *newidx, gpma_renumber_callback_t rcall, void *extra) {
if (idx!=pma->N) assert(pma->items[idx].data);
u_int32_t lo=idx;
u_int32_t hi=idx+1;
if (idx==pma->N) { lo--; hi--; }
double udt=GPMA_UDT_HIGH;
u_int32_t count = 2; // one for the item that is there, plus one for the new item.
u_int32_t width=1;
double one_over_width = 1.0;
while (1) {
assert(lo<hi); assert(hi<=pma->N); // Make those separate asserts so that we don't get false complaints from gcov.
double density = count*one_over_width;
//printf("%s:%d %d..%d density=%f udt=%f\n", __FILE__, __LINE__, lo, hi, density, udt);
if (density<=udt) break; // found a region that is good enough
// Otherwise the density isn't good.
u_int32_t N = pma->N;
assert(width<=N);
if (width<N) {
if (idx==N || width&idx) { // Grow the array downward.
u_int32_t i;
assert(lo>=width);
lo -= width;
for (i=0; i<width; i++) {
if (pma->items[lo+i].data) count++;
}
} else { // Grow the array upward.
u_int32_t i;
for (i=0; i<width; i++) {
if (pma->items[hi+i].data) count++;
}
hi += width;
}
width*=2;
one_over_width*=0.5;
udt -= pma->udt_step;
} else {
// The array must be resized. */
assert(0==lo); assert(hi==pma->N);
return double_array(pma, idx, newidx, rcall, extra);
}
}
return toku_gpma_smooth_region (pma, lo, hi, count, idx, newidx, rcall, extra, pma->N);
}
static int finish_insert (GPMA pma,
u_int32_t len, void*data,
gpma_renumber_callback_t rcall, void*extra_for_rcall, // if anything gets renumbered, let the caller know
u_int32_t idx,
u_int32_t *idxp // store idx into *idxp (but only do it when we succeed.)
) {
assert(idx<=toku_gpma_index_limit(pma));
if (idx==toku_gpma_index_limit(pma) || pma->items[idx].data) {
u_int32_t newidx;
int r = toku_make_space_at(pma, idx, &newidx, rcall, extra_for_rcall);
if (r!=0) return r;
idx=newidx;
assert(pma->items[idx].data==0);
}
pma->items[idx].data=data;
pma->items[idx].len =len;
pma->n_items_present++;
if (idxp) *idxp=idx;
return 0;
}
int toku_gpma_insert(GPMA pma,
u_int32_t len, void*data,
gpma_compare_fun_t compare, void *extra_for_compare,
gpma_renumber_callback_t rcall, void*extra_for_rcall, // if anything gets renumbered, let the caller know
u_int32_t *idxp
) {
int found;
u_int32_t idx = toku_gpma_find_index(pma, len, data, compare, extra_for_compare, &found);
if (found) return DB_KEYEXIST;
return finish_insert(pma, len, data, rcall, extra_for_rcall, idx, idxp);
}
int toku_gpma_insert_bessel (GPMA pma,
u_int32_t len, void *data,
gpma_besselfun_t besf, void *extra_for_besself,
gpma_renumber_callback_t renumberf, void*extra_for_renumberf, // if anything gets renumbered, let the caller know
u_int32_t *indexp // Where did the item get stored?
) {
int found;
u_int32_t idx = toku_gpma_find_index_bes(pma, besf, 0, extra_for_besself, &found);
if (found) return DB_KEYEXIST;
return finish_insert(pma, len, data, renumberf, extra_for_renumberf, idx, indexp);
}
inline int toku_max_int (int a, int b) {
return a<b ? b : a;
}
inline unsigned int toku_max_uint (unsigned int a, unsigned int b) {
return a<b ? b : a;
}
static int shrink_pma (GPMA pma, gpma_renumber_callback_t renumberf, void *extra_for_renumberf) {
u_int32_t old_N = pma->N;
if (pma->n_items_present==0) {
pma->N=8;
void *olditems = pma->items;
REALLOC_N(pma->N, pma->items);
if (pma->items==0) { pma->items = olditems; return errno; }
return 0;
}
int r;
u_int32_t *MALLOC_N(pma->n_items_present, froms); if (froms==0) { r=errno; if (0) { L0: toku_free(froms); } return r; }
u_int32_t *MALLOC_N(pma->n_items_present, tos); if (tos==0) { r=errno; if (0) { L1: toku_free(tos); } goto L0; }
struct gitem *MALLOC_N(pma->n_items_present, items); if (items==0) { r=errno; if (0) { L2: toku_free(items); } goto L1; }
u_int32_t nplaced=0;
u_int32_t i;
for (i=0; i<pma->N; i++) {
if (pma->items[i].data) {
froms[nplaced] = i;
items[nplaced++] = pma->items[i];
pma->items[i].data = 0;
}
}
{
void *olditems = pma->items;
REALLOC_N(pma->N/2, pma->items);
if (pma->items==0) { r=errno; pma->items=olditems; goto L2; }
}
u_int32_t new_N = pma->N/2;
pma->N = new_N;
//printf("Shrunk to %d\n", pma->N);
toku_gpma_distribute(pma, 0, pma->N, pma->n_items_present, items, tos);
if (renumberf) {
r = renumberf(pma->n_items_present, froms, tos, items, old_N, new_N, extra_for_renumberf);
} else {
r = 0;
}
goto L2;
}
// if minidx (inclusive) to maxidx (inclusive) gives a range of empty slots, find a big enough region and renumber everything.
int toku_smooth_deleted_region (GPMA pma, u_int32_t minidx, u_int32_t maxidx, gpma_renumber_callback_t renumberf, void *extra_for_renumberf) {
if (pma->N<=8) return 0;
u_int32_t lgN = toku_lg(pma->N);
u_int32_t lglgN = toku_lg(lgN);
u_int32_t n_steps = toku_max_uint(1, lgN-lglgN);
double increment = (GPMA_LDT_HIGH-GPMA_LDT_LOW)/n_steps;
u_int32_t initial_width = maxidx+1-minidx;
u_int32_t lg_initw = toku_lg(initial_width);
u_int32_t next_width = 1<<lg_initw;
double target = GPMA_LDT_LOW+increment*lg_initw;
u_int32_t count = 0;
u_int32_t lo=minidx;
u_int32_t hi=maxidx+1;
while (1) {
assert(next_width<=pma->N);
while (hi-lo < next_width) {
if (hi<pma->N) {
if (pma->items[hi].data) count++;
hi++;
} else {
assert(lo>0);
lo--;
if (pma->items[lo].data) count++;
}
}
// if count/(hi-lo) >= target then we are happy
if (count >= target*(hi-lo)) {
// we are happy with this width, spread things out.
return toku_gpma_smooth_region(pma, lo, hi, count, lo, 0, renumberf, extra_for_renumberf, pma->N);
}
if (next_width==pma->N) {
return shrink_pma(pma, renumberf, extra_for_renumberf);
}
next_width*=2;
}
}
int toku_gpma_delete_at_index (GPMA pma, u_int32_t index,
gpma_renumber_callback_t renumberf,
void *extra_for_renumberf) {
toku_gpma_clear_at_index(pma, index);
return toku_smooth_deleted_region(pma, index, index, renumberf, extra_for_renumberf);
}
int toku_gpma_delete_bessel (GPMA pma,
gpma_besselfun_t besself, void*extra_for_besself,
gpma_delete_callback_t deletef, void*extra_for_deletef, // for each deleted item, let the caller know
gpma_renumber_callback_t renumberf, void*extra_for_renumberf // if anything gets renumbered, let the caller know
) {
int r;
u_int32_t len;
void *data;
u_int32_t idx;
r = toku_gpma_lookup_bessel(pma, besself, 0, extra_for_besself, &len, &data, &idx);
// Find how many items there are to delete. Scan back and forward.
if (r!=0) return DB_NOTFOUND;
u_int32_t i;
int nitems=1;
u_int32_t maxidx=idx, minidx=idx;
for (i=idx+1; i<pma->N; i++) {
if (pma->items[i].data) {
if (besself(pma->items[i].len, pma->items[i].data, extra_for_besself)!=0)
break;
nitems++;
maxidx=i;
}
}
for (i=idx; i>0 ; i--) {
if (pma->items[i-1].data) {
if (besself(pma->items[i-1].len, pma->items[i-1].data, extra_for_besself)!=0)
break;
nitems++;
minidx=i-1;
}
}
pma->n_items_present -= nitems;
// Now we know the range and how many items will be deleted.
for (i=minidx; i<=maxidx; i++) {
if (pma->items[i].data) {
if (deletef) {
r = deletef(i, pma->items[i].len, pma->items[i].data, extra_for_deletef);
pma->items[i].data = 0;
if (r!=0) return r;
} else {
pma->items[i].data = 0;
}
}
}
// Now we must find a region that is sufficiently densely packed and spread things out.
return toku_smooth_deleted_region(pma, minidx, maxidx, renumberf, extra_for_renumberf);
}
int toku_gpma_delete_item (GPMA pma,
u_int32_t len, void *data,
gpma_compare_fun_t comparef, void *extra_for_comparef,
gpma_delete_callback_t deletef, void *extra_for_deletef,
gpma_renumber_callback_t renumberf, void *extra_for_renumberf) {
struct convert_extra ce = { comparef, len, data, extra_for_comparef };
return toku_gpma_delete_bessel (pma, bessel_from_compare, &ce,
deletef, extra_for_deletef,
renumberf, extra_for_renumberf);
}
#if 0
// Delete anything for which the besselfun is zero.
// If things go wrong (e.g., the renumber_callback returns nonzero, or memory runs out
int toku_gpma_delete(GPMA pma,
gpma_besselfun_t besf,
gpma_delete_callback_t delcall, // call this on each deleted object
gpma_renumber_callback_t rcall, // if anything gets renumbered, let the caller know
void*extra) {
}
#endif
int toku_gpma_lookup_item (GPMA pma,
u_int32_t len, void *data, gpma_compare_fun_t comparef, void *extra, u_int32_t *resultlen, void **resultdata, u_int32_t *idxp) {
int found;
u_int32_t idx = toku_gpma_find_index(pma, len, data, comparef, extra, &found);
if (!found) return DB_NOTFOUND;
*resultlen = pma->items[idx].len;
*resultdata = pma->items[idx].data;
if (idxp) *idxp=idx;
return 0;
}
int toku_gpma_lookup_bessel(GPMA pma, gpma_besselfun_t besf, int direction, void*extra, u_int32_t *resultlen, void **resultdata, u_int32_t *idxp) {
int found;
u_int32_t idx = toku_gpma_find_index_bes(pma, besf, direction, extra, &found);
if (idxp) *idxp=idx;
if (found) {
*resultlen =pma->items[idx].len;
*resultdata=pma->items[idx].data;
return 0;
} else {
return DB_NOTFOUND;
}
}
// Split the pma, putting some right suffix into newpma. Try to split up so sum(lengths)+ overhead*N is equal.
// Move at least one element (if there is one)
// newpma is an empty pma
// If an error code is returned, then the pmas are likely to be all messed up. Probably all you can do is close them.
int toku_gpma_split (GPMA pma, GPMA newpma, u_int32_t overhead,
int (*realloc_data)(u_int32_t olen, void *odata, void **ndata, void *extra),
void *extra_realloc,
gpma_renumber_callback_t rcall,
void *extra_rcall,
gpma_renumber_callback_t rcall_across_pmas, // This one is called for everything that moved
void *extra_rcall_across) {
unsigned long totalweight=0;
u_int32_t old_N = pma->N;
{
u_int32_t i;
for (i=0; i<pma->N; i++) if (pma->items[i].data) totalweight += overhead +pma->items[i].len;
}
//toku_verify_gpma(pma);
if (totalweight==0) return 0; // Nothing there
unsigned long weight=0;
u_int32_t prev=0;
u_int32_t n_to_move=0;
u_int32_t i;
for (i=0; 1; i++) {
assert(i<pma->N);
if (pma->items[i].data) {
u_int32_t delta = 1 + pma->items[i].len;
if (weight+delta > totalweight/2) break; // prev is the last one to split.
weight += delta;
n_to_move++;
prev = i;
}
}
u_int32_t split_here = prev;
u_int32_t n_left = n_to_move;
u_int32_t n_right = pma->n_items_present - n_left;
#define MALLOC_N_ECK(n,v,l,lp) MALLOC_N(n,v); if (!v) { r=errno; if (0) { l: toku_free(v); } goto lp; }
int r;
if (0) { L0: return r; }
struct gitem *MALLOC_N_ECK(n_left, leftitems, L1,L0);
struct gitem *MALLOC_N_ECK(n_right, rightitems, L2,L1);
u_int32_t *MALLOC_N_ECK(n_left, leftfroms, L3,L2);
u_int32_t *MALLOC_N_ECK(n_right, rightfroms, L4,L3);
u_int32_t *MALLOC_N_ECK(n_left, lefttos, L5,L4);
u_int32_t *MALLOC_N_ECK(n_right, righttos, L6,L5);
{
u_int32_t n_moved=0;
for (i=0; i<=split_here; i++) {
if (pma->items[i].data) {
leftfroms[n_moved] = i;
leftitems[n_moved++] = pma->items[i];
pma->items[i].data = 0;
}
}
assert(n_moved==n_left);
}
{
u_int32_t n_moved=0;
for (i=split_here+1; i<pma->N; i++) {
if (pma->items[i].data) {
rightfroms[n_moved] = i;
rightitems[n_moved++] = pma->items[i];
pma->items[i].data = 0;
}
}
assert(n_moved==n_right);
}
for (i=0; i<n_right; i++) {
void *ndata;
//printf("%s:%d len=%d\n", __FILE__, __LINE__, rightitems[i].len);
r = realloc_data (rightitems[i].len, rightitems[i].data, &ndata, extra_realloc);
if (r!=0) { goto L6; } // At this point the PMA is all messed up, and there is no easy way to put it all back together again.
rightitems[i].data=ndata;
}
// Now we have split out the left and right stuff. All we have to do is put it back.
pma->N = toku_hyperceil(2*n_left);
newpma->N = toku_hyperceil(2*n_left);
REALLOC_N(pma->N, pma->items); if (!pma->items) return errno;
REALLOC_N(newpma->N, newpma->items); if (!pma->items) return errno;
for (i=0; i<pma->N; i++) pma->items[i].data=0;
for (i=0; i<newpma->N; i++) newpma->items[i].data=0;
toku_gpma_distribute(pma, 0, pma->N, n_left, leftitems, lefttos);
toku_gpma_distribute(newpma, 0, newpma->N, n_right, rightitems, righttos);
pma->n_items_present = n_left;
newpma->n_items_present = n_right;
//toku_verify_gpma(pma);
//toku_verify_gpma(newpma);
r = rcall_across_pmas(n_right, rightfroms, righttos, rightitems, old_N, newpma->N, extra_rcall_across);
if (r!=0) { goto L6; }
r = rcall(n_left, leftfroms, lefttos, leftitems, old_N, pma->N, extra_rcall);
if (r!=0) { goto L6; }
r=0;
goto L6; // free all that stuff
}
int toku_gpma_valididx (GPMA pma, u_int32_t idx) {
return (idx<pma->N) && pma->items[idx].data;
}
int toku_gpma_get_from_index(GPMA pma, u_int32_t idx, u_int32_t *len, void **data) {
if (idx>=pma->N) return EINVAL;
void *d=pma->items[idx].data;
if (d==0) return DB_NOTFOUND;
*data=d;
*len =pma->items[idx].len;
return 0;
}
void toku_gpma_set_at_index (GPMA pma, u_int32_t idx, u_int32_t len, void *data) {
assert(idx<pma->N);
if (pma->items[idx].data==0)
pma->n_items_present++;
pma->items[idx].data=data;
pma->items[idx].len =len;
}
void toku_gpma_clear_at_index (GPMA pma, u_int32_t idx) {
assert(idx<pma->N);
if (pma->items[idx].data) {
pma->n_items_present--;
}
pma->items[idx].data = 0;
}
void toku_verify_gpma (GPMA pma) {
// The only thing we can really verify is that the n_items_present is OK.
u_int32_t i;
u_int32_t count=0;
for (i=0; i<pma->N; i++) {
if (pma->items[i].data) count++;
}
assert(count==pma->n_items_present);
#if 0
// We can also check that the lengths match up, but that's really brt-specific.
for (i=0; i<pma->N; i++) {
if (pma->items[i].data) {
struct foo {unsigned int a,b;} *foop = pma->items[i].data;
assert(sizeof(*foop)+foop->a+foop->b==pma->items[i].len);
}
}
#endif
}
int toku_resize_gpma_exactly (GPMA pma, u_int32_t newsize) {
void *old = pma->items;
REALLOC_N(newsize, pma->items);
if (pma->items==0) {
pma->items = old;
return errno;
}
u_int32_t i;
for (i=pma->N; i<newsize; i++) pma->items[i].data=0;
pma->N = newsize;
return 0;
}
#ifndef GPMA_H
#define GPMA_H
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
// Need this to get the u_int32_t types and so forth
#include <sys/types.h>
typedef struct gpma *GPMA;
struct gitem {
u_int32_t len;
void *data;
};
typedef int (*gpma_compare_fun_t)(u_int32_t alen, void *aval, u_int32_t blen, void *bval, void*extra);
typedef int (*gpma_besselfun_t)(u_int32_t dlen, void *dval, void *extra); // return a number, not an error code.
typedef int (*gpma_delete_callback_t)(u_int32_t slotnum, u_int32_t deletelen, void*deletedata, void*extra); // return 0 if OK.
// If the pma moves things around and/or changes the size of the pma, it calls this function to indicate what happened.
typedef int (*gpma_renumber_callback_t)(u_int32_t nitems, // How many things moved
u_int32_t *froms, // An array of indices indicating where things moved from
u_int32_t *tos, // An array of indices indicating where thigns moved to
struct gitem *items, // The actual items that were moved
u_int32_t old_N, // The old size of the target array
u_int32_t new_N, // The new size of the target array
void *extra); // Context
typedef void (*gpma_free_callback_t)(u_int32_t len, void*freeme, void*extra);
// initial_index_limit must be zero or a power of two.
int toku_gpma_create (GPMA*, int initial_index_limit);
/* Return 0 if OK, and sets the referenced GPMA to NULL. */
void toku_gpma_free (GPMA*, gpma_free_callback_t, void*);
// How many items are present
u_int32_t toku_gpma_n_entries (GPMA);
// What is the maximum index limit
u_int32_t toku_gpma_index_limit (GPMA);
// Require that the item not be already present, according ot the compare function
// The data in the DBT is passed in.
int toku_gpma_insert (GPMA,
u_int32_t len, void*data,
gpma_compare_fun_t comparef, void*extra_for_comparef,
gpma_renumber_callback_t renumberf, void*extra_for_renumberf, // if anything gets renumbered, let the caller know
u_int32_t *indexp // Where did the item get stored?
);
// Use a bessel function to determine where to insert the data.
// Puts the new value between the rightmost -1 and the leftmost +1.
// Requires: Nothing in the pma returns 0.
int toku_gpma_insert_bessel (GPMA pma,
u_int32_t len, void *data,
gpma_besselfun_t, void *extra_for_besself,
gpma_renumber_callback_t renumberf, void*extra_for_renumberf, // if anything gets renumbered, let the caller know
u_int32_t *indexp // Where did the item get stored?
);
// Delete a particular index, and rebalance the tree.
int toku_gpma_delete_at_index (GPMA pma, u_int32_t index,
gpma_renumber_callback_t renumberf,
void *extra_for_renumberf);
// Delete anything for which the besselfun is zero. The besselfun must be monotonically increasing compared to the comparison function.
// That is, if two othings compare to be < then their besselfun's must yield <=, and if the compare to be = their besselfuns must be =, and if they are > then their besselfuns must be >=
// Note the delete_callback would be responsible for calling free on the object.
int toku_gpma_delete_bessel (GPMA,
gpma_besselfun_t,
void*extra_for_besself,
gpma_delete_callback_t,
void*extra_for_deletef,
gpma_renumber_callback_t, // if anything gets renumbered, let the caller know
void*extra_for_renumberf);
// Delete any items for which the compare function says things are zero.
// For each item deleted, invoke deletef.
// For any items moved around, invoke renumberf.
int toku_gpma_delete_item (GPMA,
u_int32_t len, void *data,
gpma_compare_fun_t comparef, void *extra_for_comparef,
gpma_delete_callback_t deletef, void *extra_for_deletef,
gpma_renumber_callback_t renumberf, void *extra_for_renumberf);
// Look up a particular item, using the compare function. Find some X such that compf(len,data, X.len, X.data)==0
// (Note that the len and data passed here are always passed as the first pair of arguments to compf. )
// The item being looked up is the second pair of arguments.
int toku_gpma_lookup_item (GPMA, u_int32_t len, void *data, gpma_compare_fun_t compf, void*extra, u_int32_t *resultlen, void **resultdata, u_int32_t *idx);
// Lookup something according to the besselfun.
// If direction==0 then return something for which the besselfun is zero (or return DB_NOTFOUND and set the idx to point at the spot where the item would go. That spot may already have an element in it, or it may be off the end.)
// If more than one value is zero, return the leftmost such value.
// If direction>0 then return the first thing for which the besselfun is positive (or return DB_NOTFOUND).
// If direction<0 then return the last thing for which the besselfun is negative (or return DB_NOTFOUND).
int toku_gpma_lookup_bessel (GPMA, gpma_besselfun_t, int direction, void*extra, u_int32_t *len, void **data, u_int32_t *idx);
void toku_gpma_iterate (GPMA, void(*)(u_int32_t len, void*data, void*extra), void*extra);
#define GPMA_ITERATE(table,idx,vallen,val,body) ({ \
u_int32_t idx; \
for (idx=0; idx<toku_gpma_index_limit(table); idx++) { \
u_int32_t vallen; void*val; \
if (0==toku_gpma_get_from_index(table, idx, &vallen, &val)) { \
body; \
} } })
int toku_gpma_valididx (GPMA, u_int32_t idx);
int toku_gpma_get_from_index (GPMA, u_int32_t idx, u_int32_t *len, void **data);
// Whatever is in the slot gets overwritten. Watch out that you free the thing before overwriting it.
void toku_gpma_set_at_index (GPMA, u_int32_t idx, u_int32_t len, void*data);
// Clears the item at a particular index without rebalancing the PMA.
void toku_gpma_clear_at_index (GPMA, u_int32_t idx);
int toku_gpma_move_inside_pma_by_renumbering (GPMA,
u_int32_t nitems,
u_int32_t *froms, u_int32_t *tos);
int toku_gpma_split (GPMA pma, GPMA newpma, u_int32_t overhead,
int (*realloc_data)(u_int32_t len, void *odata, void **ndata, void *extra),
void *extra_realloc,
gpma_renumber_callback_t rcall,
void *extra_rcall,
gpma_renumber_callback_t rcall_across_pmas, // This one is called for everything that moved. It is called first (before the rcall). The old_N is the size of pma before resizing.
void *extra_rcall_across);
void toku_verify_gpma (GPMA pma);
// Change the size of the PMA. Anything beyond the oldsize is discarded (if the newsize is smaller) or zerod (if the newsize is larger)
int toku_resize_gpma_exactly (GPMA pma, u_int32_t newsize);
#endif
...@@ -27,13 +27,9 @@ ...@@ -27,13 +27,9 @@
* The case of a committed pair and a provisional pair can be represented by a committed pair, since it doesn't matter whether the transction aborts or commits, the value is the same. * The case of a committed pair and a provisional pair can be represented by a committed pair, since it doesn't matter whether the transction aborts or commits, the value is the same.
*/ */
#include "mempool.h"
#include "brttypes.h" #include "brttypes.h"
#include "gpma.h"
#include "rbuf.h" #include "rbuf.h"
typedef struct leafentry *LEAFENTRY;
u_int32_t toku_le_crc(LEAFENTRY v); u_int32_t toku_le_crc(LEAFENTRY v);
int le_committed (u_int32_t klen, void* kval, u_int32_t dlen, void* dval, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *result); int le_committed (u_int32_t klen, void* kval, u_int32_t dlen, void* dval, u_int32_t *resultsize, u_int32_t *disksize, LEAFENTRY *result);
......
...@@ -203,33 +203,23 @@ const struct logtype logtypes[] = { ...@@ -203,33 +203,23 @@ const struct logtype logtypes[] = {
NULLFIELD}}, NULLFIELD}},
{"insertleafentry", 'I', FA{{"FILENUM", "filenum", 0}, {"insertleafentry", 'I', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0}, {"DISKOFF", "diskoff", 0},
{"u_int32_t", "pmaidx", 0}, {"u_int32_t", "idx", 0},
{"LEAFENTRY", "newleafentry", 0}, {"LEAFENTRY", "newleafentry", 0},
NULLFIELD}}, NULLFIELD}},
{"deleteleafentry", 'D', FA{{"FILENUM", "filenum", 0}, {"deleteleafentry", 'D', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "diskoff", 0}, {"DISKOFF", "diskoff", 0},
{"u_int32_t", "pmaidx", 0}, {"u_int32_t", "idx", 0},
{"LEAFENTRY", "oldleafentry", 0}, {"LEAFENTRY", "oldleafentry", 0},
NULLFIELD}}, NULLFIELD}},
{"deleteinleaf", 'd', FA{{"TXNID", "txnid", 0}, {"leafsplit", 's', FA{{"FILENUM", "filenum", 0}, // log the creation of a new node by splitting stuff out of an old node
{"FILENUM", "filenum", 0}, {"DISKOFF", "old_diskoff", 0},
{"DISKOFF", "diskoff", 0}, {"DISKOFF", "new_diskoff", 0},
{"u_int32_t", "pmaidx", 0}, {"u_int32_t", "old_n", 0},
{"BYTESTRING", "key", 0}, {"u_int32_t", "split_at", 0},
{"BYTESTRING", "data", 0}, {"u_int32_t", "new_nodesize", 0},
NULLFIELD}}, {"u_int32_t", "new_rand4", "%08x"},
{"resizepma", 'R', FA{{"FILENUM", "filenum", 0}, {"u_int8_t", "is_dupsort", 0},
{"DISKOFF", "diskoff", 0}, NULLFIELD}},
{"u_int32_t", "oldsize", 0},
{"u_int32_t", "newsize", 0},
NULLFIELD}},
{"pmadistribute", 'M', FA{{"FILENUM", "filenum", 0},
{"DISKOFF", "old_diskoff", 0},
{"DISKOFF", "new_diskoff", 0},
{"INTPAIRARRAY", "fromto", 0},
{"u_int32_t", "old_N", 0},
{"u_int32_t", "new_N", 0},
NULLFIELD}},
{0,0,FA{NULLFIELD}} {0,0,FA{NULLFIELD}}
}; };
......
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include <errno.h>
#include <sys/types.h>
typedef struct value *OMTVALUE;
#include "omt.h"
#include "../newbrt/memory.h"
#include "../newbrt/toku_assert.h"
#include "../include/db.h"
#include "../newbrt/brttypes.h"
#include <stdlib.h>
#include <stdint.h>
/* Things that would go in a omt-tests.h if we split to multiple files later. */
int verbose=0;
#define CKERR(r) ({ if (r!=0) fprintf(stderr, "%s:%d error %d %s\n", __FILE__, __LINE__, r, strerror(r)); assert(r==0); })
#define CKERR2(r,r2) ({ if (r!=r2) fprintf(stderr, "%s:%d error %d %s, expected %d\n", __FILE__, __LINE__, r, strerror(r), r2); assert(r==r2); })
#define CKERR2s(r,r2,r3) ({ if (r!=r2 && r!=r3) fprintf(stderr, "%s:%d error %d %s, expected %d or %d\n", __FILE__, __LINE__, r, strerror(r), r2,r3); assert(r==r2||r==r3); })
#include <string.h>
void parse_args (int argc, const char *argv[]) {
const char *argv0=argv[0];
while (argc>1) {
int resultcode=0;
if (strcmp(argv[1], "-v")==0) {
verbose++;
} else if (strcmp(argv[1], "-q")==0) {
verbose = 0;
} else if (strcmp(argv[1], "-h")==0) {
do_usage:
fprintf(stderr, "Usage:\n%s [-v|-h]\n", argv0);
exit(resultcode);
} else {
resultcode=1;
goto do_usage;
}
argc--;
argv++;
}
}
/* End ".h like" stuff. */
struct value {
u_int32_t number;
};
enum rand_type {
TEST_RANDOM,
TEST_SORTED,
TEST_IDENTITY
};
enum close_when_done {
CLOSE_WHEN_DONE,
KEEP_WHEN_DONE
};
/* Globals */
OMT omt;
OMTVALUE* values = NULL;
struct value* nums = NULL;
u_int32_t length;
void cleanup_globals(void) {
assert(values);
toku_free(values);
values = NULL;
assert(nums);
toku_free(nums);
nums = NULL;
}
const unsigned int random_seed = 0xFEADACBA;
void init_init_values(unsigned int seed, u_int32_t num_elements) {
srandom(seed);
cleanup_globals();
MALLOC_N(num_elements, values);
assert(values);
MALLOC_N(num_elements, nums);
assert(nums);
length = num_elements;
}
void init_identity_values(unsigned int seed, u_int32_t num_elements) {
u_int32_t i;
init_init_values(seed, num_elements);
for (i = 0; i < length; i++) {
nums[i].number = i;
values[i] = (OMTVALUE)&nums[i];
}
}
void init_distinct_sorted_values(unsigned int seed, u_int32_t num_elements) {
u_int32_t i;
init_init_values(seed, num_elements);
u_int32_t number = 0;
for (i = 0; i < length; i++) {
number += (u_int32_t)(random() % 32) + 1;
nums[i].number = number;
values[i] = (OMTVALUE)&nums[i];
}
}
void init_distinct_random_values(unsigned int seed, u_int32_t num_elements) {
init_distinct_sorted_values(seed, num_elements);
u_int32_t i;
u_int32_t choice;
u_int32_t choices;
struct value temp;
for (i = 0; i < length - 1; i++) {
choices = length - i;
choice = random() % choices;
if (choice != i) {
temp = nums[i];
nums[i] = nums[choice];
nums[choice] = temp;
}
}
}
void init_globals(void) {
MALLOC_N(1, values);
assert(values);
MALLOC_N(1, nums);
assert(nums);
length = 1;
}
void test_close(enum close_when_done close) {
if (close == KEEP_WHEN_DONE) return;
assert(close == CLOSE_WHEN_DONE);
toku_omt_destroy(&omt);
assert(omt==NULL);
}
void test_create(enum close_when_done close) {
int r;
omt = NULL;
r = toku_omt_create(&omt);
CKERR(r);
assert(omt!=NULL);
test_close(close);
}
void test_create_size(enum close_when_done close) {
test_create(KEEP_WHEN_DONE);
assert(toku_omt_size(omt) == 0);
test_close(close);
}
void test_create_from_sorted_array(enum close_when_done close) {
int r;
omt = NULL;
r = toku_omt_create_from_sorted_array(&omt, values, length);
CKERR(r);
assert(omt!=NULL);
test_close(close);
}
void test_create_from_sorted_array_size(enum close_when_done close) {
test_create_from_sorted_array(KEEP_WHEN_DONE);
assert(toku_omt_size(omt)==length);
test_close(close);
}
void test_create_from_sorted_array_fetch_verify(enum close_when_done close) {
test_create_from_sorted_array(KEEP_WHEN_DONE);
u_int32_t i;
int r;
OMTVALUE v = (OMTVALUE)&i;
OMTVALUE oldv;
assert(length == toku_omt_size(omt));
for (i = 0; i < length; i++) {
oldv = v;
assert(oldv!=values[i]);
v = NULL;
r = toku_omt_fetch(omt, i, &v);
CKERR(r);
assert(v != NULL);
assert(v != oldv);
assert(v == values[i]);
assert(v->number == values[i]->number);
v = oldv;
r = toku_omt_fetch(omt, i, &v);
CKERR(r);
assert(v != NULL);
assert(v != oldv);
assert(v == values[i]);
assert(v->number == values[i]->number);
}
oldv = v;
for (i = length; i < length*2; i++) {
v = oldv;
r = toku_omt_fetch(omt, i, &v);
CKERR2(r, ERANGE);
assert(v == oldv);
v = NULL;
r = toku_omt_fetch(omt, i, &v);
CKERR2(r, ERANGE);
assert(v == NULL);
}
test_close(close);
}
static int iterate_helper_error_return = 1;
int iterate_helper(OMTVALUE v, u_int32_t idx, void* extra) {
if (extra != (void*)omt) return iterate_helper_error_return;
assert(v != NULL);
assert(v == values[idx]);
assert(v->number == values[idx]->number);
return 0;
}
void test_create_from_sorted_array_iterate_verify(enum close_when_done close) {
test_create_from_sorted_array(KEEP_WHEN_DONE);
int r;
iterate_helper_error_return = 0;
r = toku_omt_iterate(omt, iterate_helper, (void*)omt);
CKERR(r);
iterate_helper_error_return = 0xFEEDABBA;
r = toku_omt_iterate(omt, iterate_helper, NULL);
CKERR2(r, iterate_helper_error_return);
test_close(close);
}
void test_create_array(enum rand_type rand_choice) {
if (rand_choice == TEST_RANDOM) {
init_distinct_random_values(random_seed, 100);
}
else if (rand_choice == TEST_SORTED) {
init_distinct_sorted_values(random_seed, 100);
}
else if (rand_choice == TEST_IDENTITY) {
init_identity_values(random_seed, 100);
}
else assert(FALSE);
/* ********************************************************************** */
test_create_from_sorted_array(CLOSE_WHEN_DONE);
test_create_from_sorted_array_size(CLOSE_WHEN_DONE);
/* ********************************************************************** */
test_create_from_sorted_array_fetch_verify(CLOSE_WHEN_DONE);
/* ********************************************************************** */
test_create_from_sorted_array_iterate_verify(CLOSE_WHEN_DONE);
/* ********************************************************************** */
}
typedef struct {
u_int32_t first_zero;
u_int32_t first_pos;
} h_extra;
int test_heaviside(OMTVALUE v, void* x) {
h_extra* extra = (h_extra*)x;
assert(v && x);
assert(extra->first_zero <= extra->first_pos);
u_int32_t value = v->number;
if (value < extra->first_zero) return -1;
if (value < extra->first_pos) return 0;
return 1;
}
void heavy_extra(h_extra* extra, u_int32_t first_zero, u_int32_t first_pos) {
extra->first_zero = first_zero;
extra->first_pos = first_pos;
}
void test_find_dir(int dir, void* extra, int (*h)(OMTVALUE, void*),
int r_expect, BOOL idx_will_change, u_int32_t idx_expect,
u_int32_t number_expect) {
u_int32_t idx = UINT32_MAX;
u_int32_t old_idx = idx;
OMTVALUE omt_val;
int r;
omt_val = NULL;
if (dir == 0) {
r = toku_omt_find_zero(omt, h, extra, &omt_val, &idx);
}
else {
r = toku_omt_find( omt, h, extra, dir, &omt_val, &idx);
}
CKERR2(r, r_expect);
if (idx_will_change) {
assert(idx == idx_expect);
}
else {
assert(idx == old_idx);
}
if (r == DB_NOTFOUND) {
assert(omt_val == NULL);
}
else {
assert(omt_val->number == number_expect);
}
/* Verify we can pass NULL value. */
omt_val = NULL;
idx = old_idx;
if (dir == 0) {
r = toku_omt_find_zero(omt, h, extra, NULL, &idx);
}
else {
r = toku_omt_find( omt, h, extra, dir, NULL, &idx);
}
CKERR2(r, r_expect);
if (idx_will_change) {
assert(idx == idx_expect);
}
else {
assert(idx == old_idx);
}
assert(omt_val == NULL);
/* Verify we can pass NULL idx. */
omt_val = NULL;
idx = old_idx;
if (dir == 0) {
r = toku_omt_find_zero(omt, h, extra, &omt_val, NULL);
}
else {
r = toku_omt_find( omt, h, extra, dir, &omt_val, NULL);
}
CKERR2(r, r_expect);
assert(idx == old_idx);
if (r == DB_NOTFOUND) {
assert(omt_val == NULL);
}
else {
assert(omt_val->number == number_expect);
}
/* Verify we can pass NULL both. */
omt_val = NULL;
idx = old_idx;
if (dir == 0) {
r = toku_omt_find_zero(omt, h, extra, NULL, NULL);
}
else {
r = toku_omt_find( omt, h, extra, dir, NULL, NULL);
}
CKERR2(r, r_expect);
assert(idx == old_idx);
assert(omt_val == NULL);
}
void test_find(enum close_when_done close) {
h_extra extra;
init_identity_values(random_seed, 100);
test_create_from_sorted_array(KEEP_WHEN_DONE);
/*
-...-
A
*/
heavy_extra(&extra, length, length);
test_find_dir(-1, &extra, test_heaviside, 0, TRUE, length-1, length-1);
test_find_dir(+1, &extra, test_heaviside, DB_NOTFOUND, FALSE, 0, 0);
test_find_dir(0, &extra, test_heaviside, DB_NOTFOUND, TRUE, length, length);
/*
+...+
B
*/
heavy_extra(&extra, 0, 0);
test_find_dir(-1, &extra, test_heaviside, DB_NOTFOUND, FALSE, 0, 0);
test_find_dir(+1, &extra, test_heaviside, 0, TRUE, 0, 0);
test_find_dir(0, &extra, test_heaviside, DB_NOTFOUND, TRUE, 0, 0);
/*
0...0
C
*/
heavy_extra(&extra, 0, length);
test_find_dir(-1, &extra, test_heaviside, DB_NOTFOUND, FALSE, 0, 0);
test_find_dir(+1, &extra, test_heaviside, DB_NOTFOUND, FALSE, 0, 0);
test_find_dir(0, &extra, test_heaviside, 0, TRUE, 0, 0);
/*
-...-0...0
AC
*/
heavy_extra(&extra, length/2, length);
test_find_dir(-1, &extra, test_heaviside, 0, TRUE, length/2-1, length/2-1);
test_find_dir(+1, &extra, test_heaviside, DB_NOTFOUND, FALSE, 0, 0);
test_find_dir(0, &extra, test_heaviside, 0, TRUE, length/2, length/2);
/*
0...0+...+
C B
*/
heavy_extra(&extra, 0, length/2);
test_find_dir(-1, &extra, test_heaviside, DB_NOTFOUND, FALSE, 0, 0);
test_find_dir(+1, &extra, test_heaviside, 0, TRUE, length/2, length/2);
test_find_dir(0, &extra, test_heaviside, 0, TRUE, 0, 0);
/*
-...-+...+
AB
*/
heavy_extra(&extra, length/2, length/2);
test_find_dir(-1, &extra, test_heaviside, 0, TRUE, length/2-1, length/2-1);
test_find_dir(+1, &extra, test_heaviside, 0, TRUE, length/2, length/2);
test_find_dir(0, &extra, test_heaviside, DB_NOTFOUND, TRUE, length/2, length/2);
/*
-...-0...0+...+
AC B
*/
heavy_extra(&extra, length/3, 2*length/3);
test_find_dir(-1, &extra, test_heaviside, 0, TRUE, length/3-1, length/3-1);
test_find_dir(+1, &extra, test_heaviside, 0, TRUE, 2*length/3, 2*length/3);
test_find_dir(0, &extra, test_heaviside, 0, TRUE, length/3, length/3);
/* Cleanup */
test_close(close);
}
int main(int argc, const char *argv[]) {
parse_args(argc, argv);
init_globals();
test_create( CLOSE_WHEN_DONE);
test_create_size( CLOSE_WHEN_DONE);
test_create_array(TEST_SORTED);
test_create_array(TEST_RANDOM);
test_create_array(TEST_IDENTITY);
test_find(CLOSE_WHEN_DONE);
cleanup_globals();
return 0;
}
/*
UNTESTED COMPLETELY:
int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index);
Effect:
If direction >0 then find the smallest i such that h(V_i,extra)>0.
If direction <0 then find the largest i such that h(V_i,extra)<0.
If value!=NULL then store V_i in *value
If index!=NULL then store i in *index.
Requires: The signum of h is monotically increasing.
Returns
0 success
DB_NOTFOUND no such value is found.
On nonzero return, *value and *index are unchanged.
Performance: time=O(\log N)
Rationale:
The direction==0 is a strange case that should go away in the future.
Here's how to use the find function to find various things
Cases for find:
find first value: ( h(v)=+1, direction=+1 )
find last value ( h(v)=-1, direction=-1 )
find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 )
find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 )
find X or successor to X ( same as find first X. )
Rationale: To help understand heaviside functions and behavor of find:
There are 7 kinds of heaviside functions.
The signus of the h must be monotonically increasing.
Given a function of the following form, A is the element
returned for direction>0, B is the element returned
for direction<0, and C is the element returned for
direction==0 (see find_zero).
If any of A, B, or C are not found, then asking for the
associated direction will return DB_NOTFOUND.
See find_zero for more information.
Let the following represent the signus of the heaviside function.
-...-
A
+...+
B
0...0
C
-...-0...0
AC
0...0+...+
C B
-...-+...+
AB
-...-0...0+...+
AC B
int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index);
// Effect: Increases indexes of all items at slot >= index by 1.
// Insert value into the position at index.
int toku_omt_set_at (OMT omt, OMTVALUE value, u_int32_t index);
// Effect: Replaces the item at index with value.
int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, u_int32_t *index);
// Effect: Insert value into the OMT.
// If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
// Otherwise, let i be the minimum value such that $h(V_i, v)>0$.
// If no such i exists, then let i be |V|
// Then this has the same effect as
// omt_insert_at(tree, value, i);
// If index!=NULL then i is stored in *index
int toku_omt_delete_at(OMT omt, u_int32_t index);
// Effect: Delete the item in slot index.
// Decreases indexes of all items at slot >= index by 1.
int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index);
// Effect: Find the smallest i such that h(V_i, extra)>=0
// If there is such an i and h(V_i,extra)==0 then set *index=i and return 0.
// If there is such an i and h(V_i,extra)>0 then set *index=i and return DB_NOTFOUND.
// If there is no such i then set *index=toku_omt_size(V) and return DB_NOTFOUND.
int toku_omt_split_at(OMT omt, OMT *newomt, u_int32_t index);
// Effect: Create a new OMT, storing it in *newomt.
// The values to the right of index (starting at index) are moved to *newomt.
int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomt);
// Effect: Appends leftomt and rightomt to produce a new omt.
// Sets *newomt to the new omt.
// On success, leftomt and rightomt destroyed,.
// Returns 0 on success
// ENOMEM on out of memory.
// On error, nothing is modified.
// Performance: time=O(n) is acceptable, but one can imagine implementations that are O(\log n) worst-case.
*/
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
#include <errno.h>
#include <sys/types.h>
typedef struct value *OMTVALUE;
#include "omt.h"
#include "../newbrt/memory.h"
#include "../newbrt/toku_assert.h"
#include "../include/db.h"
typedef struct omt_node *OMT_NODE;
struct omt_node {
u_int32_t weight; // how many values below us (including this node)
OMT_NODE left, right;
OMTVALUE value;
};
struct omt {
OMT_NODE root;
u_int32_t tmparray_size;
OMT_NODE *tmparray;
};
int toku_omt_create (OMT *omtp) {
OMT MALLOC(result);
if (result==NULL) return errno;
result->root=NULL;
result->tmparray_size = 4;
MALLOC_N(result->tmparray_size, result->tmparray);
if (result->tmparray==0) {
toku_free(result);
return errno;
}
*omtp = result;
return 0;
}
static u_int32_t nweight (OMT_NODE n) {
if (n==NULL) return 0;
else return n->weight;
}
static void fill_array_from_omt_nodes_tree (OMT_NODE *array, OMT_NODE tree) {
if (tree==NULL) return;
fill_array_from_omt_nodes_tree(array, tree->left);
array[nweight(tree->left)] = tree;
fill_array_from_omt_nodes_tree(array+nweight(tree->left)+1, tree->right);
}
static void rebuild_from_sorted_array_of_omt_nodes(OMT_NODE *np, OMT_NODE *nodes, u_int32_t numvalues) {
if (numvalues==0) {
*np=NULL;
} else {
u_int32_t halfway = numvalues/2;
OMT_NODE newnode = nodes[halfway];
newnode->weight = numvalues;
// value is already in there.
rebuild_from_sorted_array_of_omt_nodes(&newnode->left, nodes, halfway);
rebuild_from_sorted_array_of_omt_nodes(&newnode->right, nodes+halfway+1, numvalues-(halfway+1));
*np = newnode;
}
}
static void maybe_rebalance (OMT omt, OMT_NODE *np) {
OMT_NODE n = *np;
if (n==0) return;
// one of the 1's is for the root.
// the other is to take ceil(n/2)
if (((1+nweight(n->left)) < (1+1+nweight(n->right))/2)
||
((1+nweight(n->right)) < (1+1+nweight(n->left))/2)) {
// Must rebalance the tree.
fill_array_from_omt_nodes_tree(omt->tmparray, *np);
rebuild_from_sorted_array_of_omt_nodes(np, omt->tmparray, nweight(*np));
}
}
static int insert_internal (OMT omt, OMT_NODE *np, OMTVALUE value, u_int32_t index) {
if (*np==0) {
assert(index==0);
OMT_NODE MALLOC(newnode);
if (newnode==0) return errno;
newnode->weight = 1;
newnode->left = NULL;
newnode->right = NULL;
newnode->value = value;
*np = newnode;
return 0;
} else {
OMT_NODE n=*np;
int r;
if (index <= nweight(n->left)) {
if ((r = insert_internal(omt, &n->left, value, index))) return r;
} else {
if ((r = insert_internal(omt, &n->right, value, index-nweight(n->left)-1))) return r;
}
n->weight++;
maybe_rebalance(omt, np);
return 0;
}
}
static int make_sure_array_is_sized_ok (OMT omt, u_int32_t n) {
u_int32_t new_size;
if (omt->tmparray_size < n) {
new_size = 2*n;
do_realloc: ;
OMT_NODE *newarray = toku_realloc(omt->tmparray, new_size * sizeof(*newarray));
if (newarray==0) return errno;
omt->tmparray = newarray;
omt->tmparray_size = new_size;
} else if (omt->tmparray_size/4 > n && n>=2) {
new_size = 2*n;
goto do_realloc;
}
return 0;
}
int toku_omt_insert_at (OMT omt, OMTVALUE value, u_int32_t index) {
int r;
if ((r=make_sure_array_is_sized_ok(omt, 1+nweight(omt->root)))) return r;
return insert_internal(omt, &omt->root, value, index);
}
static void set_at_internal (OMT_NODE n, OMTVALUE v, u_int32_t index) {
assert(n);
if (index<nweight(n->left))
set_at_internal(n->left, v, index);
else if (index==nweight(n->left)) {
n->value = v;
} else {
set_at_internal(n->right, v, index-nweight(n->left)-1);
}
}
int toku_omt_set_at (OMT omt, OMTVALUE value, u_int32_t index) {
if (index>=nweight(omt->root)) return ERANGE;
set_at_internal(omt->root, value, index);
return 0;
}
int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, u_int32_t *index) {
int r;
u_int32_t idx;
r = toku_omt_find(omt, h, v, +1, NULL, &idx);
if (r==DB_NOTFOUND) idx=toku_omt_size(omt);
else if (r!=0) return r;
if ((r = toku_omt_insert_at(omt, value, idx))) return r;
if (index) *index = idx;
return 0;
}
static void delete_internal (OMT omt, OMT_NODE *np, u_int32_t index, OMTVALUE *vp) {
OMT_NODE n=*np;
if (index < nweight(n->left)) {
delete_internal(omt, &n->left, index, vp);
n->weight--;
} else if (index == nweight(n->left)) {
if (n->left==NULL) {
*np = n->right;
*vp = n->value;
toku_free(n);
} else if (n->right==NULL) {
*np = n->left;
*vp = n->value;
toku_free(n);
} else {
OMTVALUE zv;
// delete the successor of index, get the value, and store it here.
delete_internal(omt, &n->right, 0, &zv);
n->value = zv;
n->weight--;
}
} else {
delete_internal(omt, &n->right, index-nweight(n->left)-1, vp);
n->weight--;
}
maybe_rebalance(omt, np);
}
int toku_omt_delete_at(OMT omt, u_int32_t index) {
OMTVALUE v;
int r;
if (index>=nweight(omt->root)) return ERANGE;
if ((r=make_sure_array_is_sized_ok(omt, -1+nweight(omt->root)))) return r;
delete_internal(omt, &omt->root, index, &v);
return 0;
}
static int fetch_internal (OMT_NODE n, u_int32_t i, OMTVALUE *v) {
if (n==NULL) return ERANGE;
if (i < nweight(n->left)) {
return fetch_internal(n->left, i, v);
} else if (i == nweight(n->left)) {
*v = n->value;
return 0;
} else {
return fetch_internal(n->right, i-nweight(n->left)-1, v);
}
}
int toku_omt_fetch (OMT V, u_int32_t i, OMTVALUE *v) {
return fetch_internal(V->root, i, v);
}
static int find_internal_zero (OMT_NODE n, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index) {
if (n==NULL) {
if (index!=NULL) (*index)=0;
return DB_NOTFOUND;
}
int hv = h(n->value, extra);
if (hv<0) {
int r = find_internal_zero(n->right, h, extra, value, index);
if (index!=NULL) (*index) += nweight(n->left)+1;
return r;
} else if (hv>0) {
return find_internal_zero(n->left, h, extra, value, index);
} else {
int r = find_internal_zero(n->left, h, extra, value, index);
if (r==DB_NOTFOUND) {
if (index!=NULL) *index = nweight(n->left);
if (value!=NULL) *value = n->value;
r = 0;
}
return r;
}
}
int toku_omt_find_zero (OMT t, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index) {
return find_internal_zero(t->root, h, extra, value, index);
}
// If direction <0 then find the largest i such that h(V_i,extra)<0.
static int find_internal_minus (OMT_NODE n, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index) {
if (n==NULL) return DB_NOTFOUND;
int hv = h(n->value, extra);
if (hv<0) {
int r = find_internal_minus(n->right, h, extra, value, index);
if (r==0 && index!=NULL) (*index) += nweight(n->left)+1;
else if (r==DB_NOTFOUND) {
if (index!=NULL) *index = nweight(n->left);
if (value!=NULL) *value = n->value;
r = 0;
}
return r;
} else {
return find_internal_minus(n->left, h, extra, value, index);
}
}
// If direction >0 then find the smallest i such that h(V_i,extra)>0.
static int find_internal_plus (OMT_NODE n, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index) {
if (n==NULL) return DB_NOTFOUND;
int hv = h(n->value, extra);
if (hv>0) {
int r = find_internal_plus(n->left, h, extra, value, index);
if (r==DB_NOTFOUND) {
if (index!=NULL) *index = nweight(n->left);
if (value!=NULL) *value = n->value;
r = 0;
}
return r;
} else {
int r = find_internal_plus(n->right, h, extra, value, index);
if (r==0 && index!=NULL) (*index) += nweight(n->left)+1;
return r;
}
}
int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index) {
if (direction==0) {
abort();
} else if (direction<0) {
return find_internal_minus(V->root, h, extra, value, index);
} else {
return find_internal_plus(V->root, h, extra, value, index);
}
}
static void free_omt_nodes (OMT_NODE n) {
if (n==0) return;
free_omt_nodes(n->left);
free_omt_nodes(n->right);
toku_free(n);
}
// Example: numvalues=4, halfway=2, left side is values of size 2
// right side is values+3 of size 1
// numvalues=3, halfway=1, left side is values of size 1
// right side is values+2 of size 1
// numvalues=2, halfway=1, left side is values of size 1
// right side is values+2 of size 0
// numvalues=1, halfway=0, left side is values of size 0
// right side is values of size 0.
static int create_from_sorted_array_internal(OMT_NODE *np, OMTVALUE *values, u_int32_t numvalues) {
if (numvalues==0) {
*np=NULL;
return 0;
} else {
int r;
u_int32_t halfway = numvalues/2;
OMT_NODE MALLOC(newnode);
if (newnode==NULL) return errno;
newnode->weight = numvalues;
newnode->value = values[halfway];
if ((r = create_from_sorted_array_internal(&newnode->left, values, halfway))) {
toku_free(newnode);
return r;
}
if ((r = create_from_sorted_array_internal(&newnode->right, values+halfway+1, numvalues-(halfway+1)))) {
free_omt_nodes(newnode->left);
toku_free(newnode);
return r;
}
*np = newnode;
return 0;
}
}
int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, u_int32_t numvalues) {
OMT omt;
int r;
if ((r = toku_omt_create(&omt))) return r;
if ((r = create_from_sorted_array_internal(&omt->root, values, numvalues))) {
toku_omt_destroy(&omt);
return r;
}
if ((r=make_sure_array_is_sized_ok(omt, numvalues))) {
toku_omt_destroy(&omt);
return r;
}
*omtp=omt;
return 0;
}
void toku_omt_destroy(OMT *omtp) {
OMT omt=*omtp;
free_omt_nodes(omt->root);
toku_free(omt->tmparray);
toku_free(omt);
*omtp=NULL;
}
u_int32_t toku_omt_size(OMT V) {
return nweight(V->root);
}
static int iterate_internal(OMT_NODE n, u_int32_t idx, int (*f)(OMTVALUE, u_int32_t, void*), void*v) {
int r;
if (n==NULL) return 0;
if ((r=iterate_internal(n->left, idx, f, v))) return r;
if ((r=f(n->value, idx+nweight(n->left), v))) return r;
return iterate_internal(n->right, idx+nweight(n->left)+1, f, v);
}
int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, u_int32_t, void*), void*v) {
return iterate_internal(omt->root, 0, f, v);
}
int toku_omt_split_at(OMT omt, OMT *newomtp, u_int32_t index) {
if (index>=nweight(omt->root)) return ERANGE;
int r;
u_int32_t newsize = toku_omt_size(omt)-index;
OMT newomt;
if ((r = toku_omt_create(&newomt))) return r;
if ((r = make_sure_array_is_sized_ok(newomt, newsize))) {
fail:
toku_omt_destroy(&newomt);
return r;
}
OMT_NODE *MALLOC_N(toku_omt_size(omt), nodes);
if (nodes==0) {
r = errno;
goto fail;
}
// Modify omt's array at the last possible moment, since after this nothing can fail.
if ((r = make_sure_array_is_sized_ok(omt, index))) {
toku_free(nodes);
goto fail;
}
fill_array_from_omt_nodes_tree(nodes, omt->root);
rebuild_from_sorted_array_of_omt_nodes(&newomt->root, nodes+index, newsize);
rebuild_from_sorted_array_of_omt_nodes(&omt->root, nodes, index);
toku_free(nodes);
*newomtp = newomt;
return 0;
}
int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomtp) {
int r;
OMT newomt;
u_int32_t newsize = toku_omt_size(leftomt)+toku_omt_size(rightomt);
if ((r = toku_omt_create(&newomt))) return r;
if ((r = make_sure_array_is_sized_ok(newomt, newsize))) {
toku_omt_destroy(&newomt);
return r;
}
fill_array_from_omt_nodes_tree(newomt->tmparray, leftomt->root);
fill_array_from_omt_nodes_tree(newomt->tmparray+toku_omt_size(leftomt), rightomt->root);
rebuild_from_sorted_array_of_omt_nodes(&newomt->root, newomt->tmparray, newsize);
leftomt->root = rightomt->root = NULL;
toku_omt_destroy(&leftomt);
toku_omt_destroy(&rightomt);
*newomtp = newomt;
return 0;
}
#if !defined(OMT_H)
#define OMT_H
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
// Order Maintenance Tree (OMT)
//
// Maintains a collection of totally ordered values, where each value has an integer weight.
// The OMT is a mutable datatype.
//
// The Abstraction:
//
// An OMT is a vector of values, $V$, where $|V|$ is the length of the vector.
// The vector is numbered from $0$ to $|V|-1$.
// Each value has a weight. The weight of the $i$th element is denoted $w(V_i)$.
//
// We can create a new OMT, which is the empty vector.
//
// We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where
// $|V'|=1+|V|$ and
//
// V'_j = V_j if $j<i$
// x if $j=i$
// V_{j-1} if $j>i$.
//
// We can specify $i$ using a kind of function instead of as an integer.
// Let $b$ be a function mapping from values to nonzero integers, such that
// the signum of $b$ is monotically increasing.
// We can specify $i$ as the minimum integer such that $b(V_i)>0$.
//
// We look up a value using its index, or using a Heaviside function.
// For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing.
// When lookup up values, we can look up
// $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$. (With a special return code if no such value exists.)
// (Rationale: Ordinarily we want $i$ to be unique. But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.)
// $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$. (Or an indication that no such value exists.)
// $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$. (Or an indication that no such value exists.)
//
// When looking up a value using a Heaviside function, we get the value and its index.
//
// We can also split an OMT into two OMTs, splitting the weight of the values evenly.
// Find a value $j$ such that the values to the left of $j$ have about the same total weight as the values to the right of $j$.
// The resulting two OMTs contain the values to the left of $j$ and the values to the right of $j$ respectively.
// All of the values from the original OMT go into one of the new OMTs.
// If the weights of the values don't split exactly evenly, then the implementation has the freedom to choose whether
// the new left OMT or the new right OMT is larger.
//
// Performance:
// Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function.
// The memory required is O(|V|).
//
// The programming API:
//typedef struct value *OMTVALUE; // A slight improvement over using void*.
typedef struct omt *OMT;
int toku_omt_create (OMT *omtp);
// Effect: Create an empty OMT. Stores it in *omtp.
// Requires: omtp != NULL
// Returns:
// 0 success
// ENOMEM out of memory (and doesn't modify *omtp)
// Performance: constant time.
int toku_omt_create_from_sorted_array(OMT *omtp, OMTVALUE *values, u_int32_t numvalues);
// Effect: Create a OMT containing values. The number of values is in numvalues.
// Stores the new OMT in *omtp.
// Requires: omtp != NULL
// Requires: values != NULL
// Requires: values is sorted
// Returns:
// 0 success
// ENOMEM out of memory (and doesn't modify *omtp)
// Performance: time=O(numvalues)
// Rational: Normally to insert N values takes O(N lg N) amortized time.
// If the N values are known in advance, are sorted, and
// the structure is empty, we can batch insert them much faster.
void toku_omt_destroy(OMT *omtp);
// Effect: Destroy an OMT, freeing all its memory.
// Does not free the OMTVALUEs stored in the OMT.
// Those values may be freed before or after calling toku_omt_destroy.
// Also sets *omtp=NULL.
// Requires: omtp != NULL
// Requires: *omtp != NULL
// Rationale: The usage is to do something like
// toku_omt_destroy(&s->omt);
// and now s->omt will have a NULL pointer instead of a dangling freed pointer.
// Rationale: Returns no values since free() cannot fail.
// Rationale: Does not free the OMTVALUEs to reduce complexity.
// Performance: time=O(toku_omt_size(*omtp))
u_int32_t toku_omt_size(OMT V);
// Effect: return |V|.
// Requires: V != NULL
// Performance: time=O(1)
int toku_omt_iterate(OMT omt, int (*f)(OMTVALUE, u_int32_t, void*), void*v);
// Effect: Iterate over the values of the omt, from left to right, calling f on each value.
// The second argument passed to f is the index of the value.
// The third argument passed to f is v.
// The indices run from 0 (inclusive) to toku_omt_size(omt) (exclusive).
// Requires: omt != NULL
// Requires: f != NULL
// Returns:
// If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by toku_omt_iterate.
// If f always returns zero, then toku_omt_iterate returns 0.
// Requires: Don't modify omt while running. (E.g., f may not insert or delete values form omt.)
// Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in omt.
// Rational: Although the functional iterator requires defining another function (as opposed to C++ style iterator), it is much easier to read.
int toku_omt_insert_at(OMT omt, OMTVALUE value, u_int32_t index);
// Effect: Increases indexes of all items at slot >= index by 1.
// Insert value into the position at index.
//
// Returns:
// 0 success
// ERANGE if index>toku_omt_size(omt)
// ENOMEM
// On error, omt is unchanged.
// Performance: time=O(\log N) amortized time.
// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
int toku_omt_set_at (OMT omt, OMTVALUE value, u_int32_t index);
// Effect: Replaces the item at index with value.
// Returns:
// 0 success
// ERANGE
// On error, omt i sunchanged.
// Performance: time=O(\log N)
// Rationale: The BRT needs to be able to replace a value with another copy of the same value (allocated in a different location)
int toku_omt_insert(OMT omt, OMTVALUE value, int(*h)(OMTVALUE, void*v), void *v, u_int32_t *index);
// Effect: Insert value into the OMT.
// If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
// Otherwise, let i be the minimum value such that $h(V_i, v)>0$.
// If no such i exists, then let i be |V|
// Then this has the same effect as
// omt_insert_at(tree, value, i);
// If index!=NULL then i is stored in *index
// Requires: The signum of h must be monotonically increasing.
// Returns:
// 0 success
// DB_KEYEXIST the key is present (h was equal to zero for some value)
// ENOMEM
// On nonzero return, omt is unchanged.
// On nonzero non-DB_KEYEXIST return, *index is unchanged.
// Performance: time=O(\log N) amortized.
// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
int toku_omt_delete_at(OMT omt, u_int32_t index);
// Effect: Delete the item in slot index.
// Decreases indexes of all items at slot >= index by 1.
// Returns
// 0 success
// ERANGE if index>=toku_omt_size(omt)
// On error, omt is unchanged.
// Rationale: To delete an item, first find its index using toku_omt_find, then delete it.
// Performance: time=O(\log N) amortized.
int toku_omt_fetch (OMT V, u_int32_t i, OMTVALUE *v);
// Effect: Set *v=V_i
// Requires: v != NULL
// Returns
// 0 success
// ERANGE if i out of range
// On nonzero return, *v is unchanged.
// Performance: time=O(\log N)
int toku_omt_find_zero(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, OMTVALUE *value, u_int32_t *index);
// Effect: Find the smallest i such that h(V_i, extra)>=0
// If there is such an i and h(V_i,extra)==0 then set *index=i and return 0.
// If there is such an i and h(V_i,extra)>0 then set *index=i and return DB_NOTFOUND.
// If there is no such i then set *index=toku_omt_size(V) and return DB_NOTFOUND.
// Requires: index!=NULL
int toku_omt_find(OMT V, int (*h)(OMTVALUE, void*extra), void*extra, int direction, OMTVALUE *value, u_int32_t *index);
/* Effect:
If direction >0 then find the smallest i such that h(V_i,extra)>0.
If direction <0 then find the largest i such that h(V_i,extra)<0.
If value!=NULL then store V_i in *value
If index!=NULL then store i in *index.
Requires: The signum of h is monotically increasing.
Returns
0 success
DB_NOTFOUND no such value is found.
On nonzero return, *value and *index are unchanged.
Performance: time=O(\log N)
Rationale:
The direction==0 is a strange case that should go away in the future.
Here's how to use the find function to find various things
Cases for find:
find first value: ( h(v)=+1, direction=+1 )
find last value ( h(v)=-1, direction=-1 )
find first X ( h(v)=(v< x) ? -1 : 1 direction=+1 )
find last X ( h(v)=(v<=x) ? -1 : 1 direction=-1 )
find X or successor to X ( same as find first X. )
Rationale: To help understand heaviside functions and behavor of find:
There are 7 kinds of heaviside functions.
The signus of the h must be monotonically increasing.
Given a function of the following form, A is the element
returned for direction>0, B is the element returned
for direction<0, and C is the element returned for
direction==0 (see find_zero).
If any of A, B, or C are not found, then asking for the
associated direction will return DB_NOTFOUND.
See find_zero for more information.
Let the following represent the signus of the heaviside function.
-...-
A
+...+
B
0...0
C
-...-0...0
AC
0...0+...+
C B
-...-+...+
AB
-...-0...0+...+
AC B
*/
int toku_omt_split_at(OMT omt, OMT *newomt, u_int32_t index);
// Effect: Create a new OMT, storing it in *newomt.
// The values to the right of index (starting at index) are moved to *newomt.
// Requires: omt != NULL
// Requires: newomt != NULL
// Returns
// 0 success,
// ERANGE if index > toku_omt_size(omt)
// ENOMEM
// On nonzero return, omt and *newomt are unmodified.
// Performance: time=O(n)
// Rationale: We don't need a split-evenly operation. We need to split items so that their total sizes
// are even, and other similar splitting criteria. It's easy to split evenly by calling toku_omt_size(), and dividing by two.
int toku_omt_merge(OMT leftomt, OMT rightomt, OMT *newomt);
// Effect: Appends leftomt and rightomt to produce a new omt.
// Sets *newomt to the new omt.
// On success, leftomt and rightomt destroyed,.
// Returns 0 on success
// ENOMEM on out of memory.
// On error, nothing is modified.
// Performance: time=O(n) is acceptable, but one can imagine implementations that are O(\log n) worst-case.
#endif /* #ifndef OMT_H */
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
#include "log_header.h" #include "log_header.h"
#include "toku_assert.h" #include "toku_assert.h"
#include "kv-pair.h" #include "kv-pair.h"
#include "gpma-internal.h" #include "omt.h"
#include <fcntl.h> #include <fcntl.h>
#include <stdlib.h> #include <stdlib.h>
...@@ -175,14 +175,14 @@ void toku_recover_newbrtnode (LSN lsn, FILENUM filenum,DISKOFF diskoff,u_int32_t ...@@ -175,14 +175,14 @@ void toku_recover_newbrtnode (LSN lsn, FILENUM filenum,DISKOFF diskoff,u_int32_t
n->thisnodename = diskoff; n->thisnodename = diskoff;
n->log_lsn = n->disk_lsn = lsn; n->log_lsn = n->disk_lsn = lsn;
//printf("%s:%d %p->disk_lsn=%"PRId64"\n", __FILE__, __LINE__, n, n->disk_lsn.lsn); //printf("%s:%d %p->disk_lsn=%"PRId64"\n", __FILE__, __LINE__, n, n->disk_lsn.lsn);
n->layout_version = 4; n->layout_version = 5;
n->height = height; n->height = height;
n->rand4fingerprint = rand4fingerprint; n->rand4fingerprint = rand4fingerprint;
n->flags = is_dup_sort ? TOKU_DB_DUPSORT : 0; // Don't have TOKU_DB_DUP ??? n->flags = is_dup_sort ? TOKU_DB_DUPSORT : 0; // Don't have TOKU_DB_DUP ???
n->local_fingerprint = 0; // nothing there yet n->local_fingerprint = 0; // nothing there yet
n->dirty = 1; n->dirty = 1;
if (height==0) { if (height==0) {
r=toku_gpma_create(&n->u.l.buffer, 0); r=toku_omt_create(&n->u.l.buffer);
assert(r==0); assert(r==0);
n->u.l.n_bytes_in_buffer=0; n->u.l.n_bytes_in_buffer=0;
{ {
...@@ -222,7 +222,7 @@ static void recover_setup_node (FILENUM filenum, DISKOFF diskoff, CACHEFILE *cf, ...@@ -222,7 +222,7 @@ static void recover_setup_node (FILENUM filenum, DISKOFF diskoff, CACHEFILE *cf,
*cf = pair->cf; *cf = pair->cf;
} }
void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING val) { static void toku_recover_deqrootentry (LSN lsn __attribute__((__unused__)), FILENUM filenum, TXNID xid, u_int32_t typ, BYTESTRING key, BYTESTRING val) {
struct cf_pair *pair = NULL; struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair); int r = find_cachefile(filenum, &pair);
assert(r==0); assert(r==0);
...@@ -488,7 +488,37 @@ void toku_recover_cfclose (LSN UU(lsn), BYTESTRING UU(fname), FILENUM filenum) { ...@@ -488,7 +488,37 @@ void toku_recover_cfclose (LSN UU(lsn), BYTESTRING UU(fname), FILENUM filenum) {
toku_free_BYTESTRING(fname); toku_free_BYTESTRING(fname);
} }
void toku_recover_insertleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, LEAFENTRY newleafentry) { // The memory for the new node should have already been allocated.
void toku_recover_leafsplit (LSN lsn, FILENUM filenum, DISKOFF old_diskoff, DISKOFF new_diskoff, u_int32_t old_n, u_int32_t new_n, u_int32_t new_node_size, u_int32_t new_rand4, u_int8_t is_dup_sort) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
void *nodeA_v;
assert(pair->brt);
r = toku_cachetable_get_and_pin(pair->cf, old_diskoff, &nodeA_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, pair->brt);
assert(r==0);
BRTNODE oldn = nodeA_v;
assert(oldn->height==0);
TAGMALLOC(BRTNODE, newn);
newn->nodesize = new_node_size;
newn->thisnodename = new_diskoff;
newn->log_lsn = newn->disk_lsn = lsn;
//printf("%s:%d %p->disk_lsn=%"PRId64"\n", __FILE__, __LINE__, n, n->disk_lsn.lsn);
newn->layout_version = 4;
newn->height = 0;
newn->rand4fingerprint = new_rand4;
newn->flags = is_dup_sort ? TOKU_DB_DUPSORT : 0; // Don't have TOKU_DB_DUP ???
newn->local_fingerprint = 0; // nothing there yet
newn->dirty = 1;
assert(toku_omt_size(oldn->u.l.buffer)==old_n);
r = toku_omt_split_at(oldn->u.l.buffer, &newn->u.l.buffer, new_n);
}
void toku_recover_insertleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t idx, LEAFENTRY newleafentry) {
struct cf_pair *pair = NULL; struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair); int r = find_cachefile(filenum, &pair);
assert(r==0); assert(r==0);
...@@ -502,11 +532,12 @@ void toku_recover_insertleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_ ...@@ -502,11 +532,12 @@ void toku_recover_insertleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_
node->log_lsn = lsn; node->log_lsn = lsn;
{ {
int memsize = leafentry_memsize(newleafentry); int memsize = leafentry_memsize(newleafentry);
void *mem = mempool_malloc_from_gpma(node->u.l.buffer, &node->u.l.buffer_mempool, memsize); void *mem = mempool_malloc_from_omt(node->u.l.buffer, &node->u.l.buffer_mempool, memsize);
assert(mem); assert(mem);
memcpy(mem, newleafentry, memsize); memcpy(mem, newleafentry, memsize);
toku_gpma_set_at_index(node->u.l.buffer, pmaidx, memsize, mem); r = toku_omt_insert_at(node->u.l.buffer, mem, idx);
node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + leafentry_disksize(newleafentry); assert(r==0);
node->u.l.n_bytes_in_buffer += OMT_ITEM_OVERHEAD + leafentry_disksize(newleafentry);
node->local_fingerprint += node->rand4fingerprint * toku_le_crc(newleafentry); node->local_fingerprint += node->rand4fingerprint * toku_le_crc(newleafentry);
} }
r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node)); r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
...@@ -514,7 +545,7 @@ void toku_recover_insertleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_ ...@@ -514,7 +545,7 @@ void toku_recover_insertleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_
toku_free_LEAFENTRY(newleafentry); toku_free_LEAFENTRY(newleafentry);
} }
void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, LEAFENTRY oldleafentry) { void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t idx, LEAFENTRY oldleafentry) {
struct cf_pair *pair = NULL; struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair); int r = find_cachefile(filenum, &pair);
assert(r==0); assert(r==0);
...@@ -527,214 +558,23 @@ void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_ ...@@ -527,214 +558,23 @@ void toku_recover_deleteleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_
VERIFY_COUNTS(node); VERIFY_COUNTS(node);
node->log_lsn = lsn; node->log_lsn = lsn;
{ {
u_int32_t len; void *data; LEAFENTRY data;
r=toku_gpma_get_from_index(node->u.l.buffer, pmaidx, &len, &data); r=toku_omt_fetch(node->u.l.buffer, idx, &data);
assert(r==0); assert(r==0);
assert(len==leafentry_memsize(oldleafentry)); u_int32_t len = leafentry_memsize(oldleafentry);
assert(leafentry_memsize(data)==len);
assert(memcmp(oldleafentry, data, len)==0); assert(memcmp(oldleafentry, data, len)==0);
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + leafentry_disksize(data); node->u.l.n_bytes_in_buffer -= OMT_ITEM_OVERHEAD + leafentry_disksize(data);
node->local_fingerprint -= node->rand4fingerprint * toku_le_crc(data); node->local_fingerprint -= node->rand4fingerprint * toku_le_crc(data);
toku_mempool_mfree(&node->u.l.buffer_mempool, data, len); toku_mempool_mfree(&node->u.l.buffer_mempool, data, len);
toku_gpma_clear_at_index(node->u.l.buffer, pmaidx); r = toku_omt_delete_at(node->u.l.buffer, idx);
assert(r==0);
} }
r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node)); r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
assert(r==0); assert(r==0);
toku_free_LEAFENTRY(oldleafentry); toku_free_LEAFENTRY(oldleafentry);
} }
//void toku_recover_replaceleafentry (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, LEAFENTRY oldleafentry, LEAFENTRY newleafentry) {
// struct cf_pair *pair = NULL;
// int r = find_cachefile(filenum, &pair);
// assert(r==0);
// void *node_v;
// assert(pair->brt);
// r = toku_cachetable_get_and_pin(pair->cf, diskoff, &node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, pair->brt);
// assert(r==0);
// BRTNODE node = node_v;
// assert(node->height==0);
// VERIFY_COUNTS(node);
// node->log_lsn = lsn;
// {
// u_int32_t len; void *data;
// r=toku_gpma_get_from_index(node->u.l.buffer, pmaidx, &len, &data);
// assert(r==0);
// assert(len==leafentry_memsize(oldleafentry));
// assert(memcmp(oldleafentry, data, len)==0);
// node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + leafentry_disksize(data);
// node->local_fingerprint -= node->rand4fingerprint * toku_le_crc(data);
// toku_mempool_mfree(&node->u.l.buffer_mempool, data, len);
// }
// {
// int memsize = leafentry_memsize(newleafentry);
// void *mem = mempool_malloc_from_gpma(node->u.l.buffer, &node->u.l.buffer_mempool, memsize);
// memcpy(mem, newleafentry, memsize);
// toku_gpma_set_at_index(node->u.l.buffer, pmaidx, memsize, mem);
// node->u.l.n_bytes_in_buffer += PMA_ITEM_OVERHEAD + leafentry_disksize(newleafentry);
// node->local_fingerprint += node->rand4fingerprint * toku_le_crc(newleafentry);
// }
// r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
// assert(r==0);
// toku_free_LEAFENTRY(oldleafentry);
// toku_free_LEAFENTRY(newleafentry);
//}
void toku_recover_deleteinleaf (LSN lsn, TXNID UU(txnid), FILENUM filenum, DISKOFF diskoff, u_int32_t pmaidx, BYTESTRING keybs, BYTESTRING databs) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
assert(r==0);
void *node_v;
assert(pair->brt);
r = toku_cachetable_get_and_pin(pair->cf, diskoff, &node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, pair->brt);
assert(r==0);
BRTNODE node = node_v;
assert(node->height==0);
VERIFY_COUNTS(node);
{
u_int32_t len;
void *data;
r = toku_gpma_get_from_index(node->u.l.buffer, pmaidx, &len, &data);
if (r==0) {
toku_mempool_mfree(&node->u.l.buffer_mempool, data, len);
}
}
toku_gpma_clear_at_index(node->u.l.buffer, pmaidx);
assert(!"kvpair");
//node->local_fingerprint -= node->rand4fingerprint*toku_calccrc32_kvpair(keybs.data, keybs.len, databs.data, databs.len);
node->u.l.n_bytes_in_buffer -= PMA_ITEM_OVERHEAD + KEY_VALUE_OVERHEAD + keybs.len + databs.len;
VERIFY_COUNTS(node);
node->log_lsn = lsn;
r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
assert(r==0);
toku_free_BYTESTRING(keybs);
toku_free_BYTESTRING(databs);
}
// a newbrtnode should have been done before this
void toku_recover_resizepma (LSN lsn, FILENUM filenum, DISKOFF diskoff, u_int32_t oldsize __attribute__((__unused__)), u_int32_t newsize) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
assert(r==0);
void *node_v;
assert(pair->brt);
r = toku_cachetable_get_and_pin (pair->cf, diskoff, &node_v, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, pair->brt);
assert(r==0);
BRTNODE node = node_v;
assert(node->height==0);
r = toku_resize_gpma_exactly (node->u.l.buffer, newsize);
assert(r==0);
VERIFY_COUNTS(node);
node->log_lsn = lsn;
r = toku_cachetable_unpin(pair->cf, diskoff, 1, toku_serialize_brtnode_size(node));
assert(r==0);
}
int move_indices (GPMA from, struct mempool *from_mempool,
GPMA to, struct mempool *to_mempool,
INTPAIRARRAY fromto,
u_int32_t a_rand, u_int32_t *a_fp,
u_int32_t b_rand, u_int32_t *b_fp,
u_int32_t *a_nbytes, u_int32_t *b_nbytes,
u_int32_t new_N) {
toku_verify_gpma(from);
toku_verify_gpma(to);
struct gitem *MALLOC_N(fromto.size, items);
if (items==0) return errno;
u_int32_t i;
u_int32_t fp=0;
u_int32_t sizediff=0;
for (i=0; i<fromto.size; i++) {
int idx = fromto.array[i].a;
struct gitem item = from->items[idx];
items[i]=item;
from->items[idx].data = 0;
fp += toku_le_crc(item.data);
sizediff += PMA_ITEM_OVERHEAD + leafentry_disksize(item.data);
assert(leafentry_memsize(item.data)==item.len);
}
from->n_items_present -= fromto.size;
if (new_N!=toku_gpma_index_limit(to)) {
int r = toku_resize_gpma_exactly(to, new_N);
assert(r==0);
}
for (i=0; i<fromto.size; i++) {
int to_idx = fromto.array[i].b;
assert(to->items[to_idx].data==0);
if (from==to) {
to->items[to_idx] = items[i];
} else {
void *new_data = mempool_malloc_from_gpma(to, to_mempool, items[i].len);
memcpy(new_data, items[i].data, items[i].len);
to->items[to_idx] = (struct gitem){items[i].len, new_data};
toku_mempool_mfree(from_mempool, items[i].data, items[i].len);
}
assert(leafentry_memsize(to->items[to_idx].data)==to->items[to_idx].len);
}
to->n_items_present += fromto.size;
*a_fp -= a_rand * fp;
*b_fp += b_rand * fp;
*a_nbytes -= sizediff;
*b_nbytes += sizediff;
toku_free(items);
//toku_verify_gpma(from);
//toku_verify_gpma(to);
return 0;
}
void toku_recover_pmadistribute (LSN lsn, FILENUM filenum, DISKOFF old_diskoff, DISKOFF new_diskoff, INTPAIRARRAY fromto, u_int32_t old_N, u_int32_t new_N) {
struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair);
assert(r==0);
void *node_va, *node_vb;
assert(pair->brt);
r = toku_cachetable_get_and_pin(pair->cf, old_diskoff, &node_va, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, pair->brt);
assert(r==0);
r = toku_cachetable_get_and_pin(pair->cf, new_diskoff, &node_vb, NULL, toku_brtnode_flush_callback, toku_brtnode_fetch_callback, pair->brt);
assert(r==0);
BRTNODE nodea = node_va; assert(nodea->height==0);
BRTNODE nodeb = node_vb; assert(nodeb->height==0);
{
unsigned int i;
//printf("{");
for (i=0; i<fromto.size; i++) {
//printf(" {%d %d}", fromto.array[i].a, fromto.array[i].b);
assert(fromto.array[i].a < toku_gpma_index_limit(nodea->u.l.buffer));
assert(fromto.array[i].b < new_N);
}
//printf("}\n");
}
VERIFY_COUNTS(nodea);
assert(toku_gpma_index_limit(nodea->u.l.buffer)==old_N);
r = move_indices (nodea->u.l.buffer, &nodea->u.l.buffer_mempool,
nodeb->u.l.buffer, &nodeb->u.l.buffer_mempool,
fromto,
nodea->rand4fingerprint, &nodea->local_fingerprint,
nodeb->rand4fingerprint, &nodeb->local_fingerprint,
&nodea->u.l.n_bytes_in_buffer, &nodeb->u.l.n_bytes_in_buffer,
new_N
);
// The bytes in buffer and fingerprint shouldn't change
// PMA_ITERATE_IDX(nodeb->u.l.buffer, idx, key, keylen __attribute__((__unused__)), data, datalen __attribute__((__unused__)),
// printf("%d: %s %s\n", idx, (char*)key, (char*)data));
VERIFY_COUNTS(nodea);
VERIFY_COUNTS(nodeb);
nodea->log_lsn = lsn;
nodeb->log_lsn = lsn;
r = toku_cachetable_unpin(pair->cf, old_diskoff, 1, toku_serialize_brtnode_size(nodea));
assert(r==0);
r = toku_cachetable_unpin(pair->cf, new_diskoff, 1, toku_serialize_brtnode_size(nodeb));
assert(r==0);
toku_free_INTPAIRARRAY(fromto);
}
void toku_recover_changeunnamedroot (LSN UU(lsn), FILENUM filenum, DISKOFF UU(oldroot), DISKOFF newroot) { void toku_recover_changeunnamedroot (LSN UU(lsn), FILENUM filenum, DISKOFF UU(oldroot), DISKOFF newroot) {
struct cf_pair *pair = NULL; struct cf_pair *pair = NULL;
int r = find_cachefile(filenum, &pair); int r = find_cachefile(filenum, &pair);
......
// Black box tester, uses only the public interfaces.
#include "gpma.h"
#include "memory.h"
#include "toku_assert.h"
#include "../include/db.h"
#include <stdio.h>
#include <string.h>
int verbose;
static int count_frees=0;
static void free_callback (u_int32_t len __attribute__((__unused__)), void*freeme, void *extra) {
assert(extra==(void*)&verbose);
toku_free(freeme);
count_frees++;
}
static void test_create_and_free (void) {
int r;
GPMA pma;
r = toku_gpma_create(&pma, 0);
assert(r==0);
count_frees=0;
toku_gpma_free(&pma, free_callback, &verbose);
assert(count_frees==0);
}
static int compare_strings(u_int32_t alen, void *aval, u_int32_t blen, void *bval, void *extra __attribute__((__unused__))) {
assert(alen==strlen(aval)+1);
assert(blen==strlen(bval)+1);
return strcmp(aval, bval);
}
static int rcall_never (u_int32_t nitems __attribute__((__unused__)), u_int32_t *froms __attribute__((__unused__)), u_int32_t *tos __attribute__((__unused__)), struct gitem *items __attribute__((__unused__)), u_int32_t old_N __attribute__((__unused__)), u_int32_t new_N __attribute__((__unused__)), void *extra __attribute__((__unused__))) {
assert(0);
return 0;
}
static int rcall_ok (u_int32_t nitems __attribute__((__unused__)), u_int32_t *froms __attribute__((__unused__)), u_int32_t *tos __attribute__((__unused__)), struct gitem *items __attribute__((__unused__)), u_int32_t old_N __attribute__((__unused__)), u_int32_t new_N __attribute__((__unused__)), void *extra __attribute__((__unused__))) {
return 0;
}
static void test_insert_A (void) {
int r;
GPMA pma;
r = toku_gpma_create(&pma, 0);
assert(r==0);
char *k1,*k2,*k3;
r = toku_gpma_insert(pma, 6, k1=strdup("hello"),
compare_strings, 0,
rcall_never, "hello", 0);
assert(r==0);
assert(toku_gpma_n_entries(pma)==1);
r = toku_gpma_insert(pma, 6, k2=strdup("gello"),
compare_strings, 0,
rcall_ok, "gello", 0);
assert(r==0);
r = toku_gpma_insert(pma, 6, k3=strdup("fello"),
compare_strings, 0,
rcall_ok, "fello", 0);
assert(r==0);
void *k;
r = toku_gpma_insert(pma, 6, k=strdup("fello"),
compare_strings, 0,
rcall_ok, "fello", 0);
assert(r==DB_KEYEXIST);
toku_free(k);
//printf("size=%d\n", toku_gpma_index_limit(pma));
u_int32_t resultlen;
void *resultdata;
r = toku_gpma_lookup_item(pma, 6, "hello", compare_strings, 0, &resultlen, &resultdata, 0);
assert(r==0);
assert(strcmp(resultdata, "hello")==0);
assert(resultdata==k1);
r = toku_gpma_lookup_item(pma, 6, "gello", compare_strings, 0, &resultlen, &resultdata, 0);
assert(r==0);
assert(strcmp(resultdata, "gello")==0);
assert(resultdata==k2);
u_int32_t idx=999;
r = toku_gpma_lookup_item(pma, 6, "fello", compare_strings, 0, &resultlen, &resultdata, &idx);
assert(r==0);
assert(strcmp(resultdata, "fello")==0);
assert(resultdata==k3);
assert(idx!=999);
r = toku_gpma_lookup_item(pma, 6, "aello", compare_strings, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_item(pma, 6, "fillo", compare_strings, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_item(pma, 6, "gillo", compare_strings, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_item(pma, 6, "hillo", compare_strings, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_item(pma, 6, "zello", compare_strings, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
{
int bes (u_int32_t dlen __attribute__((__unused__)), void *dval, void *extra __attribute__((__unused__))) {
return strcmp(dval, "a"); // This will return 1 for everything. For dir<=0 we'll have DB_NOTFOUND, for dir>0 we'll have "fello"
}
r = toku_gpma_lookup_bessel(pma, bes, -1, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_bessel(pma, bes, 0, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_bessel(pma, bes, +1, 0, &resultlen, &resultdata, 0);
assert(r==0);
assert(strcmp(resultdata, "fello")==0);
}
{
int bes (u_int32_t dlen __attribute__((__unused__)), void *dval, void *extra __attribute__((__unused__))) {
return strcmp(dval, "z"); // This will return -1 for everything. For dir>=0 we'll have DB_NOTFOUND, for dir<0 we'll have "hello"
}
r = toku_gpma_lookup_bessel(pma, bes, -1, 0, &resultlen, &resultdata, 0); // find the rightmost thing
assert(r==0);
assert(strcmp(resultdata, "hello")==0);
r = toku_gpma_lookup_bessel(pma, bes, 0, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_bessel(pma, bes, +1, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
}
count_frees=0;
toku_gpma_free(&pma, free_callback, &verbose);
assert(count_frees==3);
}
struct rcall1_struct {
u_int32_t expect_n_right;
u_int32_t *expect_froms_right, *expect_tos_right;
u_int32_t did_n_right;
};
int rcall1 (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N __attribute__((__unused__)), u_int32_t new_N __attribute__((__unused__)), void *extra) {
//assert(old_N==expect_old_N);
struct rcall1_struct *s = extra;
//printf("old_N=%d new_N=%d\n", old_N, new_N);
u_int32_t j;
assert(nitems==s->expect_n_right);
//printf("outer moved:"); for (j=0; j<nitems; j++) printf(" %d->%d", froms[j], tos[j]); printf("\n");
for (j=0; j<nitems; j++) {
if (s->expect_froms_right) assert(s->expect_froms_right[j]==froms[j]);
if (s->expect_tos_right) assert(s->expect_tos_right [j]==tos[j]);
assert(items[j].len==1+strlen(items[j].data));
if (j>0) {
assert(froms[j-1]<froms[j] && tos[j-1]<tos[j]);
assert(strcmp(items[j-1].data, items[j].data)<0);
}
}
s->did_n_right = nitems;
return 0;
}
void test_split_internal (const char *strings[],
int expect_n_left,
u_int32_t *expect_froms_left,
u_int32_t *expect_tos_left,
int expect_n_right,
u_int32_t *expect_froms_right,
u_int32_t *expect_tos_right) {
GPMA pma1, pma2;
int r;
r = toku_gpma_create(&pma1, 0);
assert(r==0);
r = toku_gpma_create(&pma2, 0);
assert(r==0);
assert(0==toku_gpma_valididx(pma1, toku_gpma_index_limit(pma1))); // because it's off the end of the array
assert(0==toku_gpma_valididx(pma1, 0)); // because nothing is there
assert(0!=toku_gpma_get_from_index(pma1, toku_gpma_index_limit(pma1), 0, 0));
u_int32_t i;
u_int32_t current_estimate_of_N = toku_gpma_index_limit(pma1);
//printf("%s:%d N=%d\n", __FILE__, __LINE__, current_estimate_of_N);
for (i=0; strings[i]; i++) {
int rcall_a (u_int32_t nitems __attribute__((__unused__)), u_int32_t *froms __attribute__((__unused__)), u_int32_t *tos __attribute__((__unused__)), struct gitem *items __attribute__((__unused__)), u_int32_t old_N, u_int32_t new_N, void *extra __attribute__((__unused__))) {
//printf("%s:%d old_N=%d new_N=%d est=%d\n", __FILE__, __LINE__, old_N, new_N, current_estimate_of_N);
assert(old_N==current_estimate_of_N);
current_estimate_of_N = new_N;
//printf("est=%d\n", current_estimate_of_N);
return 0;
}
u_int32_t idx, len;
void *data;
r = toku_gpma_insert(pma1, 1+strlen(strings[i]), (char*)strings[i], compare_strings, 0, rcall_a, (char*)strings[i], &idx);
//printf("est=%d\n", current_estimate_of_N);
assert(r==0);
r = toku_gpma_get_from_index(pma1, idx, &len, &data);
assert(r==0);
assert(len==1+strlen(strings[i]));
assert(data==strings[i]);
}
u_int32_t n_strings = i;
{
int do_realloc (u_int32_t len, void *data, void**ndata, void *extra) {
assert(extra==0);
assert(len=1+strlen(data));
*ndata = data; // Don't have to do anything
return 0;
}
int did_n_left=-1;
int rcall0 (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N, u_int32_t new_N, void *extra) {
//printf("%s:%d old_N=%d new_N=%d\n", __FILE__, __LINE__, old_N, new_N);
assert(old_N==current_estimate_of_N);
current_estimate_of_N = new_N;
assert(extra==0);
u_int32_t j;
if (expect_n_left>=0) assert(nitems==(u_int32_t)expect_n_left);
did_n_left=nitems;
//printf("did_n_left=%d nitems=%d n_strings=%d\n", did_n_left, nitems, n_strings);
assert(did_n_left+nitems==n_strings);
//printf("inner moved:"); for (j=0; j<nitems; j++) printf(" %d->%d", froms[j], tos[j]); printf("\n");
for (j=0; j<nitems; j++) {
if (expect_froms_left) assert(expect_froms_left[j]==froms[j]);
if (expect_tos_left) assert(expect_tos_left [j]==tos[j]);
assert(items[j].len==1+strlen(items[j].data));
if (j>0) {
assert(froms[j-1]<froms[j] && tos[j-1]<tos[j]);
assert(strcmp(items[j-1].data, items[j].data)<0);
}
}
return 0;
}
//u_int32_t expect_old_N = toku_gpma_index_limit(pma1);
struct rcall1_struct r1s = {expect_n_right, expect_froms_right, expect_tos_right, -1};
r = toku_gpma_split(pma1, pma2, 1, do_realloc, 0, rcall0, 0, rcall1, &r1s);
toku_verify_gpma(pma1);
toku_verify_gpma(pma2);
assert (r==0);
char *prevval=0;
int foundem_left[]={-1,-1,-1,-1};
int foundem_right[]={-1,-1,-1,-1};
GPMA_ITERATE(pma1, idx, vallen, val,
({
assert(toku_gpma_valididx(pma2, idx));
if (prevval!=0) assert(strcmp(prevval,val)<0);
prevval=val;
unsigned int j;
for (j=0; j<n_strings; j++) {
if (strings[j]==val) { // The strings are EQ
assert(foundem_left[j]==-1);
foundem_left[j]=idx;
}
}
}));
GPMA_ITERATE(pma2, idx, vallen, val,
({
assert(toku_gpma_valididx(pma2, idx));
if (prevval!=0) assert(strcmp(prevval,val)<0);
prevval=val;
unsigned int j;
for (j=0; j<n_strings; j++) {
if (strings[j]==val) { // The strings are EQ
assert(foundem_right[j]==-1);
foundem_right[j]=idx;
}
}
}));
{
unsigned int j;
for (j=0; j<sizeof(strings)/sizeof(*strings); j++) assert(foundem_left[j]>=0 || foundem_right[j]>=0);
}
}
toku_gpma_free(&pma1, 0, 0);
toku_gpma_free(&pma2, 0, 0);
}
void test_split (void) {
{
const char *strings[]={"the", "quick", "brown", "fox", 0};
u_int32_t expect_froms_l[]={1,3};
u_int32_t expect_tos_l []={0,2};
u_int32_t expect_froms_r[]={5,7};
u_int32_t expect_tos_r []={0,2};
test_split_internal(strings,
2,
expect_froms_l,
expect_tos_l,
2,
expect_froms_r,
expect_tos_r);
}
}
int delete_free_callback (u_int32_t slotnum __attribute__((__unused__)),
u_int32_t deletelen,
void *deletedata,
void *extra) {
assert(deletelen==6);
assert(extra==deletedata);
//printf("Freeing %s\n", (char*)deletedata);
toku_free(deletedata);
return 0;
}
void test_delete_n (int N) {
GPMA pma;
int r = toku_gpma_create(&pma, 0);
assert(r==0);
int i;
char *strings[N];
for (i=0; i<N; i++) {
char str[6];
snprintf(str, 6, "%05d", i);
strings[i]=strdup(str);
r = toku_gpma_insert(pma, 6, strings[i], compare_strings, 0, rcall_ok, strings[i], 0);
assert(r==0);
}
for (i=0; i<N; i++) {
int number_of_strings_left = N-i;
int rval = random()%number_of_strings_left;
//printf("deleting %s\n", strings[rval]);
r = toku_gpma_delete_item(pma, 6, strings[rval],
compare_strings, 0,
delete_free_callback, strings[rval],
rcall_ok, 0);
strings[rval] = strings[number_of_strings_left-1];
}
toku_gpma_free(&pma, 0, 0);
}
void test_delete (void) {
test_delete_n(3);
test_delete_n(100);
test_delete_n(300);
}
void test_delete_at (void) {
GPMA pma;
int r = toku_gpma_create(&pma, 0);
assert(r==0);
int i, j;
int N=20;
char *strings[N];
for (i=0; i<N; i++) {
char str[6];
snprintf(str, 6, "%05d", i);
strings[i]=strdup(str);
r = toku_gpma_insert(pma, 6, strings[i], compare_strings, 0, rcall_ok, strings[i], 0);
assert(r==0);
//printf("insert, N=%d\n", toku_gpma_index_limit(pma));
}
u_int32_t max_limit = toku_gpma_index_limit(pma);
u_int32_t min_limit = max_limit;
u_int32_t prev_limit = max_limit;
u_int32_t resultlen, idx;
void *resultdata;
for (j=0; j<N; j++) {
r = toku_gpma_lookup_item(pma, 6, strings[j], compare_strings, 0, &resultlen, &resultdata, &idx);
assert(r==0);
assert(resultlen==6);
assert(0==strcmp(resultdata, strings[j]));
r = toku_gpma_delete_at_index(pma, idx, 0, 0);
assert(r==0);
u_int32_t this_limit = toku_gpma_index_limit(pma);
if (this_limit<min_limit) min_limit=this_limit;
assert(this_limit<=prev_limit);
prev_limit=this_limit;
//printf("delete, N=%d\n", this_limit);
for (i=0; i<=j; i++) {
r = toku_gpma_lookup_item(pma, 6, strings[i], compare_strings, 0, &resultlen, &resultdata, &idx);
assert(r==DB_NOTFOUND);
}
for (i=j+1; i<N; i++) {
r = toku_gpma_lookup_item(pma, 6, strings[i], compare_strings, 0, &resultlen, &resultdata, &idx);
assert(r==0);
assert(resultlen==6);
assert(0==strcmp(resultdata, strings[i]));
}
}
assert(min_limit<max_limit);
for (i=0; i<N; i++) toku_free(strings[i]);
toku_gpma_free(&pma, 0, 0);
}
static int compare_this_string (u_int32_t dlen, void *dval, void *extra) {
assert(dlen==1+strlen(dval));
return strcmp(dval, extra);
}
static void test_bes (void) {
GPMA pma;
int r = toku_gpma_create(&pma, 0);
assert(r==0);
enum { N = 257 };
char *strings[N];
int i;
for (i=0; i<N; i++) {
char str[4];
snprintf(str, 4, "%03d", i);
strings[i]=strdup(str);
r = toku_gpma_insert(pma, 1+strlen(strings[i]), strings[i], compare_strings, 0, rcall_ok, strings[i], 0);
assert(r==0);
}
for (i=0; i+1<N; i++) {
u_int32_t len,idx;
void *data;
r = toku_gpma_lookup_bessel(pma, compare_this_string, +1, strings[i], &len, &data, &idx);
assert(r==0);
assert(len==1+strlen(strings[i+1]));
assert(data==strings[i+1]);
}
for (i=1; i<N; i++) {
u_int32_t len,idx;
void *data;
r = toku_gpma_lookup_bessel(pma, compare_this_string, -1, strings[i], &len, &data, &idx);
assert(r==0);
assert(len==1+strlen(strings[i-1]));
assert(data==strings[i-1]);
}
for (i=0; i<N; i++) toku_free(strings[i]);
toku_gpma_free(&pma, 0, 0);
}
int main (int argc, const char *argv[]) {
int i;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (0 == strcmp(arg, "-v") || 0 == strcmp(arg, "--verbose"))
verbose = 1;
else if (0 == strcmp(arg, "-q") || 0 == strcmp(arg, "--quiet"))
verbose = 0;
}
test_create_and_free();
test_insert_A();
test_split();
test_delete();
test_delete_at();
test_bes();
toku_malloc_cleanup();
return 0;
}
// glass box tester looks inside gpma.c
#include "gpma.h"
#include "gpma-internal.h"
#include "memory.h"
#include "toku_assert.h"
#include "../include/db.h"
#include <errno.h>
#include <string.h>
int verbose=0;
static int count_frees=0;
static void free_callback (u_int32_t len __attribute__((__unused__)), void*freeme, void *extra) {
assert(extra==(void*)&verbose);
toku_free(freeme);
count_frees++;
}
static int compare_strings(u_int32_t alen, void *aval, u_int32_t blen, void *bval, void *extra __attribute__((__unused__))) {
assert(alen==strlen(aval)+1);
assert(blen==strlen(bval)+1);
return strcmp(aval, bval);
}
static void test_lg (void) {
assert(toku_lg(1)==0);
assert(toku_lg(2)==1);
assert(toku_lg(3)==2);
assert(toku_lg(4)==2);
assert(toku_lg(5)==3);
assert(toku_lg(7)==3);
assert(toku_lg(8)==3);
assert(toku_hyperceil(0)==1);
assert(toku_hyperceil(1)==1);
assert(toku_hyperceil(2)==2);
assert(toku_hyperceil(3)==4);
assert(toku_hyperceil(4)==4);
assert(toku_hyperceil(5)==8);
assert(toku_hyperceil(7)==8);
assert(toku_hyperceil(8)==8);
assert(toku_max_int(-1,2)==2);
assert(toku_max_int(2,2)==2);
assert(toku_max_int(2,3)==3);
assert(toku_max_int(3,2)==3);
}
static void test_create_sizes (void) {
GPMA pma;
int r = toku_gpma_create(&pma, 0);
assert(r==0);
toku_gpma_free(&pma, free_callback, &verbose);
r = toku_gpma_create(&pma, 3);
assert(r==EINVAL);
}
static void test_create_badmalloc (void) {
int i;
// There are two mallocs inside toku_gpma_create. Make sure that we test the possiblity that either could fail.
for (i=0; i<2; i++) {
int killarray[2]={i,-1};
toku_dead_mallocs=killarray;
toku_malloc_counter=0;
int r;
GPMA pma;
r = toku_gpma_create(&pma, 0);
assert(r==ENOMEM);
toku_dead_mallocs=0; // killarray is no longer valid, so get rid of the ref to it.
}
}
static void test_find_index (void) {
int r;
GPMA pma;
r = toku_gpma_create(&pma, 16);
assert(r==0);
assert(toku_gpma_index_limit(pma)==16);
int found;
{
u_int32_t idx;
idx = toku_gpma_find_index(pma, 6, "hello", compare_strings, 0, &found);
assert(found==0);
assert(idx==0);
void *k;
toku_gpma_set_at_index(pma, 3, 6, k=toku_strdup("hello"));
assert(pma->items[3].len = 6);
assert(pma->items[3].data == k);
idx = toku_gpma_find_index(pma, 6, "hello", compare_strings, 0, &found);
assert(found);
assert(idx==3);
idx = toku_gpma_find_index(pma, 2, "a", compare_strings, 0, &found);
assert(!found);
assert(idx==0);
idx = toku_gpma_find_index(pma, 2, "z", compare_strings, 0, &found);
assert(!found);
assert(idx==4);
}
{
u_int32_t resultlen; void*resultdata;
int bes (u_int32_t dlen __attribute__((__unused__)), void *dval, void *extra __attribute__((__unused__))) {
return strcmp(dval, "a"); // This will return 1 for everything. For dir<=0 we'll have DB_NOTFOUND, for dir>0 we'll have "fello"
}
r = toku_gpma_lookup_bessel(pma, bes, -1, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_bessel(pma, bes, 0, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_bessel(pma, bes, +1, 0, &resultlen, &resultdata, 0);
assert(r==0);
assert(strcmp(resultdata, "hello")==0);
}
{
u_int32_t resultlen; void*resultdata;
int bes (u_int32_t dlen __attribute__((__unused__)), void *dval, void *extra __attribute__((__unused__))) {
return strcmp(dval, "z"); // This will return -1 for everything. For dir>=0 we'll have DB_NOTFOUND, for dir<0 we'll have "hello"
}
u_int32_t idx;
r = toku_gpma_lookup_bessel(pma, bes, -1, 0, &resultlen, &resultdata, &idx); // find the rightmost thing
assert(r==0);
assert(strcmp(resultdata, "hello")==0);
{
u_int32_t altlen; void*altdata;
r = toku_gpma_get_from_index(pma, idx, &altlen, &altdata);
assert(r==0);
assert(altlen==resultlen);
assert(altdata==resultdata);
}
r = toku_gpma_lookup_bessel(pma, bes, 0, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
r = toku_gpma_lookup_bessel(pma, bes, +1, 0, &resultlen, &resultdata, 0);
assert(r==DB_NOTFOUND);
}
count_frees=0;
toku_gpma_free(&pma, free_callback, &verbose);
assert(count_frees==1);
}
struct rcall_0_pair {
int idx;
int use_index_case;
};
static int rcall_0 (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N, u_int32_t new_N, void *extra) {
assert(old_N==16);
assert(new_N==16);
struct rcall_0_pair *p = extra;
assert(nitems==3);
u_int32_t i;
for (i=0; i<3; i++) assert(froms[i]==i);
for (i=0; i<2; i++) { assert(tos[i]<tos[i+1]); }
assert(strcmp(items[0].data,"a")==0);
assert(strcmp(items[1].data,"b")==0);
switch (p->use_index_case) {
case 1:
switch (p->idx) {
case 0: assert(tos[0]==5); assert(tos[1]==9); assert(tos[2]==13); break;
case 1: assert(tos[0]==1); assert(tos[1]==9); assert(tos[2]==13); break;
case 2: assert(tos[0]==1); assert(tos[1]==5); assert(tos[2]==13); break;
case 3: assert(tos[0]==1); assert(tos[1]==5); assert(tos[2]== 9); break;
case 4: assert(tos[0]==1); assert(tos[1]==5); assert(tos[2]== 9); break;
default: assert(0);
}
break;
case 0:
assert(tos[0]==1); assert(tos[1]==6); assert(tos[2]==11);
break;
default: assert(0);
}
return 0;
}
static void test_smooth_region (void) {
int r;
GPMA pma;
int use_index_case;
for (use_index_case = 0; use_index_case<2; use_index_case++) {
int malloc_failnum;
for (malloc_failnum=0; malloc_failnum<4; malloc_failnum++) {
u_int32_t idx;
for (idx=0; idx<4; idx++) {
r = toku_gpma_create(&pma, 16);
assert(r==0);
int j;
for (j=0; j<3; j++) {
char str[]={'a'+j, 0};
pma->items[j].len = 2;
pma->items[j].data = toku_strdup(str);
}
toku_malloc_counter=0;
int killarray[2]={malloc_failnum,-1};
if (malloc_failnum<3) {
toku_dead_mallocs=killarray;
}
u_int32_t newidx;
struct rcall_0_pair r0 = {idx,use_index_case};
r = toku_gpma_smooth_region(pma, 0, 16, 3, idx, use_index_case ? &newidx : 0, rcall_0, &r0, pma->N);
if (malloc_failnum<3) assert(r==ENOMEM); else assert(r==0);
toku_dead_mallocs=0;
count_frees=0;
toku_gpma_free(&pma, free_callback, &verbose);
assert(count_frees==3);
}
}
}
}
static int rcall_1 (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N, u_int32_t new_N, void *extra __attribute__((__unused__))) {
u_int32_t i;
assert(old_N==8);
assert(new_N==8);
for (i=0; i<nitems; i++) assert(froms[i]==i);
for (i=0; i<nitems-1; i++) { assert(tos[i]<tos[i+1]); }
assert(tos[0]==3); assert(tos[1]==6);
assert(strcmp(items[0].data,"a")==0);
assert(strcmp(items[1].data,"b")==0);
return 0;
}
static void test_make_space_at_up (void) {
int malloc_failnum;
for (malloc_failnum=0; malloc_failnum<4; malloc_failnum++) {
int r;
GPMA pma;
r = toku_gpma_create(&pma, 8);
assert(r==0);
assert(toku_gpma_n_entries(pma)==0);
int j;
for (j=0; j<2; j++) {
char str[]={'a'+j, 0};
pma->items[j].len = 2;
pma->items[j].data = toku_strdup(str);
}
u_int32_t newidx;
toku_malloc_counter=0;
int killarray[2]={malloc_failnum,-1};
if (malloc_failnum<3) {
toku_dead_mallocs=killarray;
}
r = toku_make_space_at(pma, 0, &newidx, rcall_1, 0);
toku_dead_mallocs=0;
if (malloc_failnum<3) assert(r==ENOMEM);
else {
assert(r==0);
assert(newidx==1);
assert(strcmp(pma->items[3].data, "a")==0);
assert(strcmp(pma->items[6].data, "b")==0);
}
count_frees=0;
toku_gpma_free(&pma, free_callback, &verbose);
assert(count_frees==2);
}
}
static int rcall_2 (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N, u_int32_t new_N, void *extra __attribute__((__unused__))) {
assert(old_N==8);
assert(new_N==8);
assert(nitems==2);
assert(froms[0]==6); assert(froms[1]==7);
assert(tos[0]==1); assert(tos[1]==6);
assert(strcmp(items[0].data,"a")==0);
assert(strcmp(items[1].data,"b")==0);
return 0;
}
static void test_make_space_at_down (void) {
int r;
GPMA pma;
int size=8;
r = toku_gpma_create(&pma, size);
assert(r==0);
assert(toku_gpma_n_entries(pma)==0);
int j;
for (j=0; j<2; j++) {
char str[]={'a'+j, 0};
pma->items[size-2+j].len = 2;
pma->items[size-2+j].data = toku_strdup(str);
}
u_int32_t newidx;
r = toku_make_space_at(pma, 7, &newidx, rcall_2, 0);
assert(r==0);
assert(newidx==3);
assert(strcmp(pma->items[1].data, "a")==0);
assert(strcmp(pma->items[6].data, "b")==0);
count_frees=0;
toku_gpma_free(&pma, free_callback, &verbose);
assert(count_frees==2);
}
static int rcall_3 (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N, u_int32_t new_N, void *extra __attribute__((__unused__))) {
assert(old_N==8);
assert(new_N==8);
assert(nitems==2);
assert(froms[0]==6); assert(froms[1]==7);
assert(tos[0]==1); assert(tos[1]==3);
assert(strcmp(items[0].data,"a")==0);
assert(strcmp(items[1].data,"b")==0);
return 0;
}
static void test_make_space_at_down_end (void) {
int no_rcall;
for (no_rcall=0; no_rcall<2; no_rcall++) {
int r;
GPMA pma;
int size=8;
r = toku_gpma_create(&pma, size);
assert(r==0);
assert(toku_gpma_n_entries(pma)==0);
int j;
for (j=0; j<2; j++) {
char str[]={'a'+j, 0};
pma->items[size-2+j].len = 2;
pma->items[size-2+j].data = toku_strdup(str);
}
u_int32_t newidx;
r = toku_make_space_at(pma, 8, &newidx, no_rcall ? 0 : rcall_3, 0);
assert(r==0);
assert(newidx==6);
assert(strcmp(pma->items[1].data, "a")==0);
assert(strcmp(pma->items[3].data, "b")==0);
count_frees=0;
toku_gpma_free(&pma, free_callback, &verbose);
assert(count_frees==2);
}
}
static int rcall_ok (u_int32_t nitems __attribute__((__unused__)), u_int32_t *froms __attribute__((__unused__)), u_int32_t *tos __attribute__((__unused__)), struct gitem *items __attribute__((__unused__)), u_int32_t old_N __attribute__((__unused__)), u_int32_t new_N __attribute__((__unused__)), void *extra __attribute__((__unused__))) {
return 0;
}
static __attribute__((__noreturn__)) int rcall_never (u_int32_t nitems __attribute__((__unused__)), u_int32_t *froms __attribute__((__unused__)), u_int32_t *tos __attribute__((__unused__)), struct gitem *items __attribute__((__unused__)), void *extra __attribute__((__unused__))) {
abort();
}
static void test_insert_malloc_fails (void) {
int malloc_failnum;
int killarray[2]={-1,-1};
for (malloc_failnum=0; malloc_failnum<8; malloc_failnum++) {
toku_dead_mallocs=killarray;
toku_dead_mallocs[0]=-1;
int n_inserted=0;
int r;
GPMA pma;
r = toku_gpma_create(&pma, 0);
assert(r==0);
toku_malloc_counter=0;
r = toku_gpma_insert(pma, 6, strdup("hello"),
compare_strings, 0, rcall_ok, "hello", 0);
assert(r==0);
assert(toku_gpma_n_entries(pma)==1);
n_inserted++;
toku_malloc_counter=0;
if (1<=malloc_failnum && malloc_failnum<5) {
toku_dead_mallocs[0]=malloc_failnum-1;
}
void *k;
r = toku_gpma_insert(pma, 6, k=strdup("gello"),
compare_strings, 0, rcall_ok, "gello", 0);
if (1<=malloc_failnum && malloc_failnum<4) {
assert(r==ENOMEM);
toku_free(k);
assert(toku_gpma_n_entries(pma)==1);
int countem=0;
u_int32_t i;
for (i=0; i<pma->N; i++) {
if (pma->items[i].data) {
countem++;
assert(strcmp("hello", pma->items[i].data)==0);
}
}
assert(countem==1);
} else {
assert(r==0);
assert(toku_gpma_n_entries(pma)==2);
n_inserted++;
r = toku_gpma_insert(pma, 6, k=strdup("fello"),
compare_strings, 0, rcall_ok, "fello", 0);
assert(pma->N==4);
n_inserted++;
toku_malloc_counter=0;
assert(pma->N==4);
if (4<=malloc_failnum && malloc_failnum<8) {
toku_dead_mallocs=killarray;
toku_dead_mallocs[0]=malloc_failnum-4;
}
r = toku_gpma_insert(pma, 6, k=strdup("fellp"),
compare_strings, 0, rcall_ok, "fellp", 0);
if (4<=malloc_failnum && malloc_failnum<8) {
assert(r==ENOMEM);
toku_free(k);
assert(pma->N==4);
} else {
assert(r==0);
n_inserted++;
assert(pma->N==8);
}
}
count_frees=0;
toku_gpma_free(&pma, free_callback, &verbose);
assert(count_frees==n_inserted);
}
toku_dead_mallocs=0;
}
static void test_distribute (void) {
GPMA pma;
int r = toku_gpma_create(&pma, 16);
assert(r==0);
struct gitem items[4] = {{2,"a"},{2,"b"},{2,"c"},{2,"d"}};
u_int32_t tos[4];
toku_gpma_distribute(pma, 0, 16, 4, items, tos);
toku_gpma_free(&pma, 0, 0);
}
static int rcall_4a (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N, u_int32_t new_N, void *extra __attribute__((__unused__))) {
assert(old_N==16);
assert(new_N==8);
assert(nitems==3);
assert(froms[0]==0); assert(tos[0]==0);
assert(froms[1]==1); assert(tos[1]==3);
assert(froms[2]==2); assert(tos[2]==6);
assert(strcmp(items[0].data,"a")==0);
assert(strcmp(items[1].data,"b")==0);
assert(strcmp(items[2].data,"c")==0);
return 0;
}
static int rcall_4b (u_int32_t nitems, u_int32_t *froms, u_int32_t *tos, struct gitem *items, u_int32_t old_N, u_int32_t new_N, void *extra __attribute__((__unused__))) {
assert(old_N==8);
assert(new_N==8);
assert(nitems==3);
assert(froms[0]==1); assert(tos[0]==1);
assert(froms[1]==3); assert(tos[1]==3);
assert(froms[2]==6); assert(tos[2]==6);
assert(strcmp(items[0].data,"a")==0);
assert(strcmp(items[1].data,"b")==0);
assert(strcmp(items[2].data,"c")==0);
return 0;
}
static void test_smooth_deleted (void) {
GPMA pma;
int r = toku_gpma_create(&pma, 16);
assert(r==0);
pma->items[0] = (struct gitem){2, "a"};
pma->items[1] = (struct gitem){2, "b"};
pma->items[2] = (struct gitem){2, "c"};
pma->n_items_present=3;
r = toku_smooth_deleted_region(pma, 3, 3, rcall_4a, 0);
assert(r==0);
r = toku_smooth_deleted_region(pma, 4, 4, rcall_4b, 0);
assert(r==0);
toku_gpma_free(&pma, 0, 0);
r = toku_gpma_create(&pma, 16);
assert(r==0);
pma->items[4] = (struct gitem){2, "a"};
pma->n_items_present = 1;
r = toku_smooth_deleted_region(pma, 15, 15, rcall_ok, 0);
assert(pma->N==8);
int i;
for (i=0; i<8; i++) {
if (i==0) assert(pma->items[i].data && 0==strcmp(pma->items[i].data,"a"));
else assert(!pma->items[i].data);
}
toku_gpma_free(&pma, 0, 0);
r = toku_gpma_create(&pma, 16);
assert(r==0);
pma->items[7] = (struct gitem){2, "a"};
pma->n_items_present = 1;
r = toku_smooth_deleted_region(pma, 0, 0, rcall_ok, 0);
assert(pma->N==8);
for (i=0; i<8; i++) {
if (i==0) assert(pma->items[i].data && 0==strcmp(pma->items[i].data,"a"));
else assert(!pma->items[i].data);
}
toku_gpma_free(&pma, 0, 0);
r = toku_gpma_create(&pma, 32);
assert(r==0);
r = toku_smooth_deleted_region(pma, 6, 12, 0, 0);
assert(r==0);
toku_gpma_free(&pma, 0, 0);
}
int bes_first (u_int32_t dlen, void *dval, void *extra) {
assert(dlen==2);
assert(extra==0);
char *val=dval;
if (val[0]=='a') return -1;
else return 1;
}
// Test looking up something with direction = -1, where every element in the array return +1, except for the first
// So we are supposed to return the largest index that has negative value, which is index 0.
static void test_lookup_first (void) {
GPMA pma;
int r = toku_gpma_create(&pma, 4);
assert(r==0);
pma->items[0] = (struct gitem){2, "a"};
pma->items[1] = (struct gitem){2, "b"};
pma->items[2] = (struct gitem){2, "c"};
pma->items[3] = (struct gitem){2, "d"};
pma->n_items_present = 3;
int found;
u_int32_t idx = toku_gpma_find_index_bes(pma, bes_first, -1, 0, &found);
// We expect the answer to be found, and we expect index to be 0.
assert(found);
assert(idx==0);
toku_gpma_free(&pma, 0, 0);
}
int bes_last (u_int32_t dlen, void *dval, void *extra) {
assert(dlen==2);
assert(extra==0);
char *val=dval;
if (val[0]=='d') return 1;
else return -1;
}
static void test_lookup_last (void) {
GPMA pma;
int r = toku_gpma_create(&pma, 4);
assert(r==0);
pma->items[0] = (struct gitem){2, "a"};
pma->items[1] = (struct gitem){2, "b"};
pma->items[2] = (struct gitem){2, "c"};
pma->items[3] = (struct gitem){2, "d"};
pma->n_items_present = 3;
int found;
u_int32_t idx = toku_gpma_find_index_bes(pma, bes_first, +1, 0, &found);
// We expect the answer to be found, and we expect index to be 1.
assert(found);
assert(idx==1);
toku_gpma_free(&pma, 0, 0);
}
int main (int argc, const char *argv[]) {
int i;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (0 == strcmp(arg, "-v") || 0 == strcmp(arg, "--verbose"))
verbose = 1;
else if (0 == strcmp(arg, "-q") || 0 == strcmp(arg, "--quiet"))
verbose = 0;
}
test_lg();
test_create_sizes();
test_create_badmalloc();
test_find_index();
test_smooth_region();
test_make_space_at_up();
test_make_space_at_down();
test_make_space_at_down_end();
test_insert_malloc_fails();
test_distribute();
toku_malloc_cleanup();
test_smooth_deleted();
test_lookup_last();
test_lookup_first();
return 0;
}
// Find out if the leftmost value is returned when the besselfun returns 0 for more than one thing.
#include "gpma.h"
#include "memory.h"
#include "toku_assert.h"
#include "../include/db.h"
#include <stdio.h>
#include <string.h>
int verbose;
static int compare_strings(u_int32_t alen, void *aval, u_int32_t blen, void *bval, void *extra __attribute__((__unused__))) {
assert(alen==strlen(aval)+1);
assert(blen==strlen(bval)+1);
return strcmp(aval, bval);
}
static int rcall_ok (u_int32_t nitems __attribute__((__unused__)), u_int32_t *froms __attribute__((__unused__)), u_int32_t *tos __attribute__((__unused__)), struct gitem *items __attribute__((__unused__)), u_int32_t old_N __attribute__((__unused__)), u_int32_t new_N __attribute__((__unused__)), void *extra __attribute__((__unused__))) {
return 0;
}
static void lookfor (GPMA pma, u_int32_t strlens, int/*char*/ minc, int /*char*/ maxc, int /*char*/ expectc) {
// Make a bessel function that returns 0 for anything in the range [minc, maxc] inclusive.
int zero_for_0_and_1 (u_int32_t dlen, void *dval, void *extra) {
assert(dlen==strlens);
assert(extra==0);
if (((char*)dval)[0]<minc) return -1;
if (((char*)dval)[0]>maxc) return +1;
return 0;
}
u_int32_t len, idx;
void *data;
int r = toku_gpma_lookup_bessel(pma, zero_for_0_and_1, 0, 0, &len, &data, &idx);
assert(r==0);
assert(len==strlens);
//printf("Got %c, expect %c\n", ((char*)data)[0], expectc);
assert(((char*)data)[0]==expectc);
}
static void test_leftmost (void) {
GPMA pma;
int r = toku_gpma_create(&pma, 0);
assert(r==0);
enum { N = 9, strlens=2 };
char *strings[N];
int i;
for (i=0; i<N; i++) {
assert(N<10); // Or we need to fix our format string
char str[strlens];
snprintf(str, strlens, "%d", i);
strings[i]=strdup(str);
r = toku_gpma_insert(pma, 1+strlen(strings[i]), strings[i], compare_strings, 0, rcall_ok, strings[i], 0);
assert(r==0);
}
int lo, hi;
for (lo=0; lo<N; lo++) {
for (hi=lo; hi<N; hi++) {
lookfor(pma, strlens, '0'+lo, '0'+hi, '0'+lo);
}
}
// Other tests go here. Check when -1 for 0, 0 for 1 and 2, 1 for 3 that we get 1
for (i=0; i<N; i++) toku_free(strings[i]);
toku_gpma_free(&pma, 0, 0);
}
int main (int argc, const char *argv[]) {
int i;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (0 == strcmp(arg, "-v") || 0 == strcmp(arg, "--verbose"))
verbose = 1;
else if (0 == strcmp(arg, "-q") || 0 == strcmp(arg, "--quiet"))
verbose = 0;
}
test_leftmost();
toku_malloc_cleanup();
return 0;
}
/* Worst-case insert patterns. */
#include "gpma.h"
#include "toku_assert.h"
#include "memory.h"
#include <string.h>
#include <stdio.h>
int verbose;
static int count_frees=0;
static void free_callback (u_int32_t len __attribute__((__unused__)), void*freeme, void *extra) {
assert(extra==(void*)&verbose);
toku_free(freeme);
}
static int compare_strings(u_int32_t alen, void *aval, u_int32_t blen, void *bval, void *extra __attribute__((__unused__))) {
assert(alen==strlen(aval)+1);
assert(blen==strlen(bval)+1);
return strcmp(aval, bval);
}
static int rcall_ok (u_int32_t nitems __attribute__((__unused__)), u_int32_t *froms __attribute__((__unused__)), u_int32_t *tos __attribute__((__unused__)), struct gitem *items __attribute__((__unused__)), u_int32_t old_N __attribute__((__unused__)), u_int32_t new_N __attribute__((__unused__)), void *extra __attribute__((__unused__))) {
return 0;
}
static int delete_callback (u_int32_t slotnum __attribute__((__unused__)), u_int32_t len, void *data, void *extra) {
assert(strlen(data)+1==len);
assert(strcmp(data, extra)==0);
toku_free(data);
return 0;
}
static const int initial_N=1000;
static const int N=100000;
static const int w=6;
static void insert_n (GPMA pma, int n) {
char buf[w+1];
int l = snprintf(buf, sizeof(buf), "%0*d", w, n);
assert(l==w);
int r = toku_gpma_insert(pma, strlen(buf)+1, strdup(buf), compare_strings, 0, rcall_ok, 0, 0);
assert(r==0);
}
static void delete_n (GPMA pma, int n) {
char buf[w+1];
int l = snprintf(buf, sizeof(buf), "%0*d", w, n);
assert(l==w);
int r = toku_gpma_delete_item(pma,
strlen(buf)+1, buf,
compare_strings, 0,
delete_callback, buf,
0, 0);
if (r!=0) printf("deleted %d\n", n);
assert(r==0);
}
static int inum (int direction, int itemnum) {
switch (direction) {
case 1:
// Insert things from left to right
return itemnum;
case -1:
// Insert things from right to left
return 2*N-1-itemnum;
case 0:
// Insert things at the outer edges
if (itemnum%2) {
return itemnum/2;
} else {
return 2*N-1-itemnum/2;
}
default: assert(0); return 0;
}
}
static void test_worst_insert(int direction) {
int r;
GPMA pma;
r = toku_gpma_create(&pma, 0);
assert(r==0);
count_frees=0;
int i;
int next_to_insert=0;
int next_to_delete=0;
int max_size = 0;
for (i=0; i<initial_N; i++) {
insert_n(pma, inum(direction,next_to_insert++));
}
for (; i<N; i++) {
insert_n(pma, inum(direction,next_to_insert++));
if (i%10==0) continue; // Make the table get slowly larger
delete_n(pma, inum(direction, next_to_delete++));
}
for (; i<2*N; i++) {
int this_size = toku_gpma_index_limit(pma);
if (this_size>max_size) max_size=this_size;
delete_n(pma, inum(direction,next_to_delete++));
if (i%20==0) continue; // Make the table get slowly smaller
insert_n(pma, inum(direction,next_to_insert++));
}
assert(count_frees==0);
if (verbose) printf("size=%d max_size=%d\n", toku_gpma_index_limit(pma), max_size);
toku_gpma_free(&pma, free_callback, &verbose);
}
int main (int argc, const char *argv[]) {
int i;
int which = 0;
for (i = 1; i < argc; i++) {
const char *arg = argv[i];
if (0 == strcmp(arg, "-v") || 0 == strcmp(arg, "--verbose"))
verbose = 1;
else if (0 == strcmp(arg, "-q") || 0 == strcmp(arg, "--quiet"))
verbose = 0;
else if (0 == strcmp(arg, "-a"))
which = 1;
else if (0 == strcmp(arg, "-b"))
which = 2;
else if (0 == strcmp(arg, "-c"))
which = 3;
}
if (which==0 || which==1) test_worst_insert(+1);
if (which==0 || which==2) test_worst_insert(-1);
if (which==0 || which==3) test_worst_insert( 0);
return 0;
}
...@@ -70,7 +70,7 @@ ydbtrace.o tdbtrace.o: tdbtrace.h ...@@ -70,7 +70,7 @@ ydbtrace.o tdbtrace.o: tdbtrace.h
ydbtrace.o: ydb.c ydbtrace.o: ydb.c
$(CC) $(CFLAGS) $(CPPFLAGS) -DTOKUTRACE -c -o $@ $< $(CC) $(CFLAGS) $(CPPFLAGS) -DTOKUTRACE -c -o $@ $<
DBBINS = ydb.o errors.o elocks.o ../newbrt/brt.o ../newbrt/brt-serialize.o ../newbrt/brt-verify.o ../newbrt/cachetable.o ../newbrt/fifo.o ../newbrt/key.o ../newbrt/leafentry.o ../newbrt/memory.o ../newbrt/mempool.o ../newbrt/gpma.o ../newbrt/ybt.o ../newbrt/primes.o ../newbrt/log.o ../newbrt/fingerprint.o ../newbrt/log_code.o ../newbrt/roll.o ../newbrt/toku_assert.o ../newbrt/recover.o DBBINS = ydb.o errors.o elocks.o ../newbrt/brt.o ../newbrt/brt-serialize.o ../newbrt/brt-verify.o ../newbrt/cachetable.o ../newbrt/fifo.o ../newbrt/key.o ../newbrt/leafentry.o ../newbrt/memory.o ../newbrt/mempool.o ../newbrt/omt.o ../newbrt/ybt.o ../newbrt/primes.o ../newbrt/log.o ../newbrt/fingerprint.o ../newbrt/log_code.o ../newbrt/roll.o ../newbrt/toku_assert.o ../newbrt/recover.o
TDBBINS = tdbtrace.o $(patsubst ydb.o,ydbtrace.o,$(DBBINS)) TDBBINS = tdbtrace.o $(patsubst ydb.o,ydbtrace.o,$(DBBINS))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment