Commit 80e6041d authored by Bradley C. Kuszmaul's avatar Bradley C. Kuszmaul

Rename

git-svn-id: file:///svn/tokudb@4 c7de825b-a66e-492c-adef-691d508d4ae1
parents
TAGS: */*.c */*.h
etags */*.c */*.h
#ifndef _YOBI_DB_H
#define _YOBI_DB_H
#include "ydb-constants.h"
#if defined(__cplusplus)
extern "C" {
#if 0
}
#endif
#endif
#include <sys/types.h>
#include <stdio.h>
typedef enum {
DB_BTREE=1,
// DB_HASH=2,
// DB_RECNO=3,
// DB_QUEUE=4,
// DB_UNKNOWN=5 /* Figure it out on open. */
} DBTYPE;
typedef enum {
DB_NOTICE_LOGFILE_CHANGED
} db_notices;
enum {
DB_VERB_CHKPOINT = 0x0001,
DB_VERB_DEADLOCK = 0x0002,
DB_VERB_RECOVERY = 0x0004
};
typedef struct yobi_db DB;
typedef struct yobi_db_btree_stat DB_BTREE_STAT;
typedef struct yobi_db_env DB_ENV;
typedef struct yobi_db_key_range DB_KEY_RANGE;
typedef struct yobi_db_lsn DB_LSN;
typedef struct yobi_db_txn DB_TXN;
typedef struct yobi_db_txn_active DB_TXN_ACTIVE;
typedef struct yobi_db_txn_stat DB_TXN_STAT;
typedef struct yobi_dbc DBC;
typedef struct yobi_dbt DBT;
struct yobi_db {
void *app_private;
int (*close) (DB *, u_int32_t);
int (*cursor) (DB *, DB_TXN *, DBC **, u_int32_t);
int (*del) (DB *, DB_TXN *, DBT *, u_int32_t);
int (*get) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t);
int (*key_range) (DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t);
int (*open) (DB *, DB_TXN *,
const char *, const char *, DBTYPE, u_int32_t, int);
int (*put) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t);
int (*remove) (DB *, const char *, const char *, u_int32_t);
int (*rename) (DB *, const char *, const char *, const char *, u_int32_t);
int (*set_bt_compare) (DB *,
int (*)(DB *, const DBT *, const DBT *));
int (*set_flags) (DB *, u_int32_t);
int (*stat) (DB *, void *, u_int32_t);
struct ydb_db_internal *i;
};
enum {
DB_DBT_MALLOC = 0x002,
DB_DBT_REALLOC = 0x010,
DB_DBT_USERMEM = 0x020,
DB_DBT_DUPOK = 0x040
};
struct yobi_dbt {
void *app_private;
void *data;
u_int32_t flags;
u_int32_t size;
u_int32_t ulen;
};
struct yobi_db_txn {
int (*commit) (DB_TXN*, u_int32_t);
u_int32_t (*id) (DB_TXN *);
// internal stuff
struct yobi_db_txn_internal *i;
};
struct yobi_dbc {
int (*c_get) (DBC *, DBT *, DBT *, u_int32_t);
int (*c_close) (DBC *);
int (*c_del) (DBC *, u_int32_t);
struct yobi_dbc_internal *i;
};
struct yobi_db_env {
// Methods used by MYSQL
void (*err) (const DB_ENV *, int, const char *, ...);
int (*open) (DB_ENV *, const char *, u_int32_t, int);
int (*close) (DB_ENV *, u_int32_t);
int (*txn_checkpoint) (DB_ENV *, u_int32_t, u_int32_t, u_int32_t);
int (*log_flush) (DB_ENV *, const DB_LSN *);
void (*set_errcall) (DB_ENV *, void (*)(const char *, char *));
void (*set_errpfx) (DB_ENV *, const char *);
void (*set_noticecall) (DB_ENV *, void (*)(DB_ENV *, db_notices));
int (*set_flags) (DB_ENV *, u_int32_t, int);
int (*set_data_dir) (DB_ENV *, const char *);
int (*set_tmp_dir) (DB_ENV *, const char *);
int (*set_verbose) (DB_ENV *, u_int32_t, int);
int (*set_lg_bsize) (DB_ENV *, u_int32_t);
int (*set_lg_dir) (DB_ENV *, const char *);
int (*set_lg_max) (DB_ENV *, u_int32_t);
int (*set_cachesize) (DB_ENV *, u_int32_t, u_int32_t, int);
int (*set_lk_detect) (DB_ENV *, u_int32_t);
int (*set_lk_max) (DB_ENV *, u_int32_t);
int (*log_archive) (DB_ENV *, char **[], u_int32_t);
int (*txn_stat) (DB_ENV *, DB_TXN_STAT **, u_int32_t);
int (*txn_begin) (DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t);
// Internal state
struct db_env_ydb_internal *i;
};
struct yobi_db_key_range {
double less,equal,grater;
};
struct yobi_db_btree_stat {
u_int32_t bt_ndata;
u_int32_t bt_nkeys;
};
struct yobi_db_txn_stat {
u_int32_t st_nactive;
DB_TXN_ACTIVE *st_txnarray;
};
struct yobi_db_lsn {
int hello;
};
struct yobi_db_txn_active {
DB_LSN lsn;
u_int32_t txnid;
};
#ifndef _YDB_WRAP_H
#define DB_VERSION_STRING "Yobiduck: Fractal DB (November 19, 2006)"
#else
#define DB_VERSION_STRING_ydb "Yobiduck: Fractal DB (November 19, 2006) (wrapped bdb)"
#endif
enum {
DB_ARCH_ABS = 0x001,
DB_ARCH_LOG = 0x004
};
enum {
DB_CREATE = 0x0000001,
DB_RDONLY = 0x0000010,
DB_RECOVER = 0x0000020,
DB_THREAD = 0x0000040,
DB_TXN_NOSYNC = 0x0000100,
DB_PRIVATE = 0x0100000
};
enum {
DB_LOCK_DEFAULT = 1,
DB_LOCK_OLDEST = 7,
DB_LOCK_RANDOM = 8
};
enum {
DB_DUP = 0x000002
};
enum {
DB_NOOVERWRITE = 23
};
enum {
DB_INIT_LOCK = 0x001000,
DB_INIT_LOG = 0x002000,
DB_INIT_MPOOL = 0x004000,
DB_INIT_TXN = 0x008000
};
int db_create (DB **, DB_ENV *, u_int32_t);
int db_env_create (DB_ENV **, u_int32_t);
int txn_begin (DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t);
int txn_commit (DB_TXN *, u_int32_t);
int txn_abort (DB_TXN *);
int log_compare (const DB_LSN *, const DB_LSN *);
#if defined(__cplusplus)
}
#endif
#endif
#ifndef _YDB_CONSTANTS_H
#define _YDB_CONSTANTS_H
enum {
DB_KEYEMPTY = -30998,
DB_KEYEXIST = -30997,
DB_LOCK_DEADLOCK = -30996,
DB_NOTFOUND = -30991,
// Private
DB_BADFORMAT = -31000
};
enum {
//DB_AFTER = 1,
DB_FIRST = 10,
DB_GET_BOTH = 11,
DB_LAST = 18,
DB_NEXT = 19,
DB_NEXT_DUP = 20,
DB_PREV = 27,
DB_SET = 30,
DB_SET_RANGE = 32,
DB_RMW = 0x40000000
};
#endif
# GCOV_FLAGS = -fprofile-arcs -ftest-coverage
#PROF_FLAGS = -pg
#OPTFLAGS = -O2
CFLAGS = -Wall -W $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS) -Werror
LDFLAGS = $(OPTFLAGS) -g $(GCOV_FLAGS) $(PROF_FLAGS)
default: bins
BINS = pma-test brt-test cachetable-test brt-serialize-test randbrt randdb4 hashtest ybt-test
bins: $(BINS)
check: bins
./ybt-test
./mdict-test
./pma-test
./cachetable-test
./brt-serialize-test
./brt-test
./hashtest
# pma: PROF_FLAGS=-fprofile-arcs -ftest-coverage
key.o: brttypes.h key.h
pma-test.o: pma-internal.h pma.h yerror.h memory.h ../include/ydb-constants.h
pma-test: pma.o memory.o key.o ybt.o
pma.o: pma.h yerror.h pma-internal.h memory.h key.h ybt.h brttypes.h ../include/ydb-constants.h
ybt.o: ybt.h brttypes.h
ybt-test: ybt-test.o ybt.o memory.o
cachetable.o: cachetable.h
brt-test: brt.o hashtable.o pma.o memory.o brt-serialize.o cachetable.o header-io.o ybt.o key.o
brt-test.o brt.o: brt.h cachetable.h brttypes.h
brt-serialize-test.o: pma.h yerror.h brt.h memory.h hashtable.h brttypes.h brt-internal.h
brt.o: brt.h mdict.h pma.h brttypes.h memory.h brt-internal.h cachetable.h
mdict.o: pma.h
hashtable.o: hashtable.h brttypes.h memory.h key.h yerror.h ../include/ydb-constants.h
memory.o: memory.h
hashtest: hashtable.o memory.o
brt-serialize.o: brt.h cachetable.h memory.h mdict.h pma.h brttypes.h brt-internal.h
header-io.o: brttypes.h brt-internal.h memory.h
mdict-test: hashtable.o pma.o memory.o
brt-serialize-test: brt-serialize-test.o brt-serialize.o memory.o hashtable.o pma.o key.o ybt.o
cachetable-test.o: cachetable.h memory.h
cachetable-test: cachetable.o memory.o cachetable-test.o
clean:
rm -rf *.o hashtest brt-test cachetable-test randbrt randdb4 *.bb *.bbg *.da
randdb4: LOADLIBES=-ldb
randbrt: brt.o hashtable.o cachetable.o memory.o brt-serialize.o
TAGS: ../*/*.c ../*/*.h
etags ../*/*.c ../*/*.h
#include "cachetable.h"
#include "hashtable.h"
#include "pma.h"
#include "brt.h"
//#include "pma.h"
typedef long long diskoff; /* Offset in a disk. -1 is the NULL pointer. */
enum { TREE_FANOUT = 16 }; //, NODESIZE=1<<20 };
enum { KEY_VALUE_OVERHEAD = 8 }; /* Must store the two lengths. */
struct nodeheader_in_file {
int n_in_buffer;
};
enum { BUFFER_HEADER_SIZE = (4 // height//
+ 4 // n_children
+ TREE_FANOUT * 8 // children
) };
typedef struct brtnode *BRTNODE;
/* Internal nodes. */
struct brtnode {
enum typ_tag tag;
unsigned int nodesize;
diskoff thisnodename;
int height; /* height is always >= 0. 0 for leaf, >0 for nonleaf. */
union node {
struct nonleaf {
int n_children; /* if n_children==TREE_FANOUT+1 then the tree needs to be rebalanced. */
bytevec childkeys[TREE_FANOUT]; /* Pivot keys. Child 0's keys are <= childkeys[0]. Child 1's keys are <= childkeys[1].
Note: It is possible that Child 1's keys are == to child 0's key's, so it is
not necessarily true that child 1's keys are > childkeys[0].
However, in the absense of duplicate keys, child 1's keys *are* > childkeys[0]. */
unsigned int childkeylens[TREE_FANOUT];
unsigned int totalchildkeylens;
diskoff children[TREE_FANOUT+1]; /* unused if height==0 */ /* Note: The last element of these arrays is used only temporarily while splitting a node. */
HASHTABLE htables[TREE_FANOUT+1];
unsigned int n_bytes_in_hashtable[TREE_FANOUT+1]; /* how many bytes are in each hashtable (including overheads) */
unsigned int n_bytes_in_hashtables;
} n;
struct leaf {
PMA buffer;
unsigned int n_bytes_in_buffer;
} l;
} u;
};
struct brt_header {
int dirty;
unsigned int nodesize;
diskoff freelist;
diskoff unused_memory;
diskoff unnamed_root;
int n_named_roots; /* -1 if the only one is unnamed */
char **names;
diskoff *roots;
};
struct brt {
CACHEFILE cf;
char *database_name;
// The header is shared. It is also ephemeral.
struct brt_header *h;
BRT_CURSOR cursors_head, cursors_tail;
};
/* serialization code */
void serialize_brtnode_to(int fd, diskoff off, diskoff size, BRTNODE node);
int deserialize_brtnode_from (int fd, diskoff off, BRTNODE *brtnode, int nodesize);
unsigned int serialize_brtnode_size(BRTNODE node); /* How much space will it take? */
unsigned int brtnode_which_child (BRTNODE node, bytevec key, ITEMLEN keylen);
int keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
void verify_counts(BRTNODE);
int serialize_brt_header_to (int fd, struct brt_header *h);
int deserialize_brtheader_from (int fd, diskoff off, struct brt_header **brth);
static inline int brtnode_n_hashtables(BRTNODE node) { if (node->height==0) return 1; else return node->u.n.n_children; }
//int write_brt_header (int fd, struct brt_header *header);
#if 1
#define DEADBEEF ((void*)0xDEADBEEF)
#else
#define DEADBEEF ((void*)0xDEADBEEFDEADBEEF)
#endif
#include "brt.h"
#include "memory.h"
#include "brt-internal.h"
#include <fcntl.h>
#include <assert.h>
#include <string.h>
void test_serialize(void) {
// struct brt source_brt;
struct brtnode sn,*dn; /* Source node, Dest node */
int fd = open("brt-serialize-test.brt", O_RDWR|O_CREAT, 0777);
int r;
assert(fd>=0);
// source_brt.fd=fd;
sn.nodesize = 1024;
sn.thisnodename = sn.nodesize*20;
sn.height = 1;
sn.u.n.n_children = 2;
sn.u.n.childkeys[0] = strdup("hello");
sn.u.n.childkeylens[0] = 6;
sn.u.n.totalchildkeylens = 6;
sn.u.n.children[0] = sn.nodesize*30;
sn.u.n.children[1] = sn.nodesize*35;
r = hashtable_create(&sn.u.n.htables[0]); assert(r==0);
r = hashtable_create(&sn.u.n.htables[1]); assert(r==0);
r = hash_insert(sn.u.n.htables[0], "a", 2, "aval", 5); assert(r==0);
r = hash_insert(sn.u.n.htables[0], "b", 2, "bval", 5); assert(r==0);
r = hash_insert(sn.u.n.htables[1], "x", 2, "xval", 5); assert(r==0);
sn.u.n.n_bytes_in_hashtables = 3*(KEY_VALUE_OVERHEAD+2+5);
serialize_brtnode_to(fd, sn.nodesize*20, sn.nodesize, &sn);
deserialize_brtnode_from(fd, sn.nodesize*20, &dn, sn.nodesize);
assert(dn->thisnodename==sn.nodesize*20);
assert(dn->height == 1);
assert(dn->u.n.n_children==2);
assert(strcmp(dn->u.n.childkeys[0], "hello")==0);
assert(dn->u.n.childkeylens[0]==6);
assert(dn->u.n.totalchildkeylens==6);
assert(dn->u.n.children[0]==sn.nodesize*30);
assert(dn->u.n.children[1]==sn.nodesize*35);
{
bytevec data; ITEMLEN datalen;
int r = hash_find(dn->u.n.htables[0], "a", 2, &data, &datalen);
assert(r==0);
assert(strcmp(data,"aval")==0);
assert(datalen==5);
r=hash_find(dn->u.n.htables[0], "b", 2, &data, &datalen);
assert(r==0);
assert(strcmp(data,"bval")==0);
assert(datalen==5);
r=hash_find(dn->u.n.htables[1], "x", 2, &data, &datalen);
assert(r==0);
assert(strcmp(data,"xval")==0);
assert(datalen==5);
}
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
memory_check = 1;
test_serialize();
return 0;
}
#define _XOPEN_SOURCE 500
#include "brt.h"
#include "memory.h"
//#include "pma.h"
#include "brt-internal.h"
#include <assert.h>
#include <unistd.h>
#include <string.h>
#include <stdio.h>
#include <arpa/inet.h>
#include <errno.h>
struct cursor {
unsigned char *buf;
unsigned int size;
unsigned int ndone;
};
void wbuf_char (struct cursor *w, int ch) {
assert(w->ndone<w->size);
w->buf[w->ndone++]=ch;
}
void wbuf_int (struct cursor *w, unsigned int i) {
wbuf_char(w, (i>>24)&0xff);
wbuf_char(w, (i>>16)&0xff);
wbuf_char(w, (i>>8)&0xff);
wbuf_char(w, (i>>0)&0xff);
}
void wbuf_bytes (struct cursor *w, bytevec bytes_bv, int nbytes) {
const unsigned char *bytes=bytes_bv;
int i;
wbuf_int(w, nbytes);
for (i=0; i<nbytes; i++) wbuf_char(w, bytes[i]);
}
void wbuf_diskoff (struct cursor *w, diskoff off) {
wbuf_int(w, off>>32);
wbuf_int(w, off&0xFFFFFFFF);
}
unsigned int rbuf_char (struct cursor *r) {
assert(r->ndone<r->size);
return r->buf[r->ndone++];
}
unsigned int rbuf_int (struct cursor *r) {
unsigned char c0 = rbuf_char(r);
unsigned char c1 = rbuf_char(r);
unsigned char c2 = rbuf_char(r);
unsigned char c3 = rbuf_char(r);
return ((c0<<24)|
(c1<<16)|
(c2<<8)|
(c3<<0));
}
/* Return a pointer into the middle of the buffer. */
void rbuf_bytes (struct cursor *r, bytevec *bytes, unsigned int *n_bytes)
{
*n_bytes = rbuf_int(r);
*bytes = &r->buf[r->ndone];
r->ndone+=*n_bytes;
assert(r->ndone<=r->size);
}
diskoff rbuf_diskoff (struct cursor *r) {
unsigned i0 = rbuf_int(r);
unsigned i1 = rbuf_int(r);
return ((unsigned long long)(i0)<<32) | ((unsigned long long)(i1));
}
static unsigned int serialize_brtnode_size_slow(BRTNODE node) {
unsigned int size=4+4; /* size+height */
if (node->height>0) {
unsigned int hsize=0;
unsigned int csize=0;
int i;
size+=4; /* n_children */
for (i=0; i<node->u.n.n_children-1; i++) {
size+=4;
csize+=node->u.n.childkeylens[i];
}
for (i=0; i<node->u.n.n_children; i++) {
size+=8;
}
int n_hashtables = brtnode_n_hashtables(node);
size+=4; /* n_entries */
for (i=0; i< n_hashtables; i++) {
HASHTABLE_ITERATE(node->u.n.htables[i],
key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen,
(hsize+=8+keylen+datalen));
}
assert(hsize==node->u.n.n_bytes_in_hashtables);
assert(csize==node->u.n.totalchildkeylens);
return size+hsize+csize;
} else {
unsigned int hsize=0;
PMA_ITERATE(node->u.l.buffer,
key __attribute__((__unused__)), keylen,
data __attribute__((__unused__)), datalen,
(hsize+=8+keylen+datalen));
assert(hsize==node->u.l.n_bytes_in_buffer);
hsize+=4; /* add n entries in buffer table. */
return size+hsize;
}
}
unsigned int serialize_brtnode_size (BRTNODE node) {
unsigned int result = 4+4; /* size+height */
assert(sizeof(off_t)==8);
if (node->height>0) {
result+=4; /* n_children */
result+=4*(node->u.n.n_children-1); /* key lengths */
result+=node->u.n.totalchildkeylens; /* the lengths of the pivot keys, without their key lengths. */
result+=8*(node->u.n.n_children); /* child offsets. */
result+=4; /* n_entries in hash table. */
result+=node->u.n.n_bytes_in_hashtables;
} else {
result+=4; /* n_entries in buffer table. */
result+=node->u.l.n_bytes_in_buffer;
if (memory_check) {
unsigned int slowresult = serialize_brtnode_size_slow(node);
if (result!=slowresult) printf("%s:%d result=%d slowresult=%d\n", __FILE__, __LINE__, result, slowresult);
assert(result==slowresult);
}
}
return result;
}
void serialize_brtnode_to(int fd, diskoff off, diskoff size, BRTNODE node) {
struct cursor w;
int i;
unsigned int calculated_size = serialize_brtnode_size(node);
assert(size>0);
w.buf=my_malloc(size);
w.size=size;
w.ndone=0;
//printf("%s:%d serializing %lld w height=%d p0=%p\n", __FILE__, __LINE__, off, node->height, node->mdicts[0]);
wbuf_int(&w, calculated_size);
wbuf_int(&w, node->height);
//printf("%s:%d w.ndone=%d n_children=%d\n", __FILE__, __LINE__, w.ndone, node->n_children);
if (node->height>0) {
wbuf_int(&w, node->u.n.n_children);
//printf("%s:%d w.ndone=%d\n", __FILE__, __LINE__, w.ndone);
for (i=0; i<node->u.n.n_children-1; i++) {
wbuf_bytes(&w, node->u.n.childkeys[i], node->u.n.childkeylens[i]);
//printf("%s:%d w.ndone=%d (childkeylen[%d]=%d\n", __FILE__, __LINE__, w.ndone, i, node->childkeylens[i]);
}
for (i=0; i<node->u.n.n_children; i++) {
wbuf_diskoff(&w, node->u.n.children[i]);
//printf("%s:%d w.ndone=%d\n", __FILE__, __LINE__, w.ndone);
}
{
int n_entries=0;
int n_hash_tables = brtnode_n_hashtables(node);
for (i=0; i< n_hash_tables; i++) {
//printf("%s:%d p%d=%p n_entries=%d\n", __FILE__, __LINE__, i, node->mdicts[i], mdict_n_entries(node->mdicts[i]));
n_entries += hashtable_n_entries(node->u.n.htables[i]);
}
//printf("%s:%d n_entries=%d\n", __FILE__, __LINE__, n_entries);
wbuf_int(&w, n_entries);
for (i=0; i< n_hash_tables; i++) {
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen,
(wbuf_bytes(&w, key, keylen),
wbuf_bytes(&w, data, datalen)));
}
}
} else {
wbuf_int(&w, pma_n_entries(node->u.l.buffer));
PMA_ITERATE(node->u.l.buffer, key, keylen, data, datalen,
(wbuf_bytes(&w, key, keylen),
wbuf_bytes(&w, data, datalen)));
}
assert(w.ndone<=w.size);
{
ssize_t r=pwrite(fd, w.buf, w.ndone, off);
if (r<0) printf("r=%d errno=%d\n", r, errno);
assert((size_t)r==w.ndone);
}
//printf("%s:%d w.done=%d r=%d\n", __FILE__, __LINE__, w.ndone, r);
assert(calculated_size==w.ndone);
//printf("%s:%d wrote %d bytes for %lld size=%lld\n", __FILE__, __LINE__, w.ndone, off, size);
assert(w.ndone<=size);
my_free(w.buf);
}
int deserialize_brtnode_from (int fd, diskoff off, BRTNODE *brtnode, int nodesize) {
TAGMALLOC(BRTNODE, result);
struct cursor rc;
int i;
uint32_t datasize;
int r;
if (errno!=0) {
r=errno;
if (0) { died0: my_free(result); }
return r;
}
{
uint32_t datasize_n;
int r = pread(fd, &datasize_n, sizeof(datasize_n), off);
//printf("%s:%d r=%d the datasize=%d\n", __FILE__, __LINE__, r, ntohl(datasize_n));
if (r!=sizeof(datasize_n)) {
if (r==-1) r=errno;
else r = DB_BADFORMAT;
goto died0;
}
datasize = ntohl(datasize_n);
if (datasize<=0 || datasize>(1<<30)) { r = DB_BADFORMAT; goto died0; }
}
rc.buf=my_malloc(datasize);
if (errno!=0) {
if (0) { died1: my_free(rc.buf); }
r=errno;
goto died0;
}
rc.size=datasize;
assert(rc.size>0);
rc.ndone=0;
//printf("Deserializing %lld datasize=%d\n", off, datasize);
{
ssize_t r=pread(fd, rc.buf, datasize, off);
if ((size_t)r!=datasize) { r=errno; goto died1; }
//printf("Got %d %d %d %d\n", rc.buf[0], rc.buf[1], rc.buf[2], rc.buf[3]);
}
{
unsigned int stored_size = rbuf_int(&rc);
if (stored_size!=datasize) { r=DB_BADFORMAT; goto died1; }
}
result->nodesize = nodesize; // How to compute the nodesize?
result->thisnodename = off;
result->height = rbuf_int(&rc);
//printf("height==%d\n", result->height);
if (result->height>0) {
result->u.n.totalchildkeylens=0;
for (i=0; i<TREE_FANOUT; i++) { result->u.n.childkeys[i]=0; result->u.n.childkeylens[i]=0; }
for (i=0; i<TREE_FANOUT+1; i++) { result->u.n.children[i]=0; result->u.n.htables[i]=0; result->u.n.n_bytes_in_hashtable[i]=0; }
result->u.n.n_children = rbuf_int(&rc);
//printf("n_children=%d\n", result->n_children);
assert(result->u.n.n_children>=0 && result->u.n.n_children<=TREE_FANOUT);
for (i=0; i<result->u.n.n_children-1; i++) {
bytevec childkeyptr;
rbuf_bytes(&rc, &childkeyptr, &result->u.n.childkeylens[i]); /* Returns a pointer into the rbuf. */
result->u.n.childkeys[i] = memdup(childkeyptr, result->u.n.childkeylens[i]);
//printf(" key %d length=%d data=%s\n", i, result->childkeylens[i], result->childkeys[i]);
result->u.n.totalchildkeylens+=result->u.n.childkeylens[i];
}
for (i=0; i<result->u.n.n_children; i++) {
result->u.n.children[i] = rbuf_diskoff(&rc);
//printf("Child %d at %lld\n", i, result->children[i]);
}
for (i=0; i<TREE_FANOUT+1; i++) {
result->u.n.n_bytes_in_hashtable[i] = 0;
}
result->u.n.n_bytes_in_hashtables = 0;
for (i=0; i<brtnode_n_hashtables(result); i++) {
int r=hashtable_create(&result->u.n.htables[i]);
if (r!=0) {
int j;
if (0) { died_12: j=brtnode_n_hashtables(result); }
for (j=0; j<i; j++) hashtable_free(&result->u.n.htables[j]);
goto died1;
}
}
{
int n_in_hash = rbuf_int(&rc);
//printf("%d in hash\n", n_in_hash);
for (i=0; i<n_in_hash; i++) {
int childnum, diff;
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
verify_counts(result);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
rbuf_bytes(&rc, &val, &vallen);
//printf("Found %s,%s\n", key, val);
childnum = brtnode_which_child(result, key, keylen);
{
int r=hash_insert(result->u.n.htables[childnum], key, keylen, val, vallen); /* Copies the data into the hash table. */
if (r!=0) { goto died_12; }
}
diff = keylen + vallen + KEY_VALUE_OVERHEAD;
result->u.n.n_bytes_in_hashtables += diff;
result->u.n.n_bytes_in_hashtable[childnum] += diff;
//printf("Inserted\n");
}
}
} else {
int n_in_buf = rbuf_int(&rc);
result->u.l.n_bytes_in_buffer = 0;
int r=pma_create(&result->u.l.buffer);
if (r!=0) {
if (0) { died_21: pma_free(&result->u.l.buffer); }
goto died1;
}
//printf("%s:%d r PMA= %p\n", __FILE__, __LINE__, result->u.l.buffer);
for (i=0; i<n_in_buf; i++) {
bytevec key; ITEMLEN keylen;
bytevec val; ITEMLEN vallen;
verify_counts(result);
rbuf_bytes(&rc, &key, &keylen); /* Returns a pointer into the rbuf. */
rbuf_bytes(&rc, &val, &vallen);
{
int r = pma_insert(result->u.l.buffer, key, keylen, val, vallen);
if (r!=0) goto died_21;
}
result->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD;
}
}
//printf("%s:%d Ok got %lld n_children=%d\n", __FILE__, __LINE__, result->thisnodename, result->n_children);
my_free(rc.buf);
*brtnode = result;
verify_counts(result);
return 0;
}
unsigned int brtnode_which_child (BRTNODE node, bytevec key, ITEMLEN keylen) {
int i;
assert(node->height>0);
for (i=0; i<node->u.n.n_children-1; i++) {
if (keycompare(key, keylen, node->u.n.childkeys[i], node->u.n.childkeylens[i])<=0) {
return i;
}
}
return node->u.n.n_children-1;
}
void verify_counts (BRTNODE node) {
if (node->height==0) {
assert(node->u.l.buffer);
} else {
unsigned int sum = 0;
int i;
for (i=0; i<node->u.n.n_children; i++)
sum += node->u.n.n_bytes_in_hashtable[i];
for (; i<TREE_FANOUT+1; i++) {
assert(node->u.n.n_bytes_in_hashtable[i]==0);
}
assert(sum==node->u.n.n_bytes_in_hashtables);
}
}
int serialize_brt_header_to (int fd, struct brt_header *h) {
struct cursor w;
int i;
unsigned int size=0; /* I don't want to mess around calculating it exactly. */
size += 4+4+8+8+4; /* this size, the tree's nodesize, freelist, unused_memory, nnamed_rootse. */
if (h->n_named_roots<0) {
size+=8;
} else {
for (i=0; i<h->n_named_roots; i++) {
size+=12 + 1 + strlen(h->names[i]);
}
}
w.buf = my_malloc(size);
w.size = size;
w.ndone = 0;
wbuf_int (&w, size);
wbuf_int (&w, h->nodesize);
wbuf_diskoff(&w, h->freelist);
wbuf_diskoff(&w, h->unused_memory);
wbuf_int (&w, h->n_named_roots);
if (h->n_named_roots>0) {
for (i=0; i<h->n_named_roots; i++) {
char *s = h->names[i];
unsigned int l = 1+strlen(s);
wbuf_diskoff(&w, h->roots[i]);
wbuf_bytes (&w, s, l);
assert(l>0 && s[l-1]==0);
}
} else {
wbuf_diskoff(&w, h->unnamed_root);
}
assert(w.ndone==size);
{
ssize_t r = pwrite(fd, w.buf, w.ndone, 0);
assert((size_t)r==w.ndone);
}
my_free(w.buf);
return 0;
}
int deserialize_brtheader_from (int fd, diskoff off, struct brt_header **brth) {
struct brt_header *MALLOC(h);
struct cursor rc;
int size;
int sizeagain;
assert(off==0);
{
uint32_t size_n;
ssize_t r = pread(fd, &size_n, sizeof(size_n), off);
if (r==0) { my_free(h); return -1; }
assert(r==sizeof(size_n));
size = ntohl(size_n);
}
rc.buf = my_malloc(size);
rc.size=size;
assert(rc.size>0);
rc.ndone=0;
{
ssize_t r = pread(fd, rc.buf, size, off);
assert(r==size);
}
h->dirty=0;
sizeagain = rbuf_int(&rc);
assert(sizeagain==size);
h->nodesize = rbuf_int(&rc);
h->freelist = rbuf_diskoff(&rc);
h->unused_memory = rbuf_diskoff(&rc);
h->n_named_roots = rbuf_int(&rc);
if (h->n_named_roots>=0) {
int i;
MALLOC_N(h->n_named_roots, h->roots);
MALLOC_N(h->n_named_roots, h->names);
for (i=0; i<h->n_named_roots; i++) {
bytevec nameptr;
unsigned int len;
h->roots[i] = rbuf_diskoff(&rc);
rbuf_bytes(&rc, &nameptr, &len);
assert(strlen(nameptr)+1==len);
h->names[i] = memdup(nameptr,len);
}
h->unnamed_root = -1;
} else {
h->roots = 0;
h->names = 0;
h->unnamed_root = rbuf_diskoff(&rc);
}
assert(rc.ndone==rc.size);
my_free(rc.buf);
*brth = h;
return 0;
}
#include "brt.h"
#include "memory.h"
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <stdlib.h>
#include <unistd.h>
extern long long n_items_malloced;
static void test0 (void) {
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
printf("%s:%d test0\n", __FILE__, __LINE__);
memory_check=1;
memory_check_all_free();
r = brt_create_cachetable(&ct, 0);
assert(r==0);
printf("%s:%d test0\n", __FILE__, __LINE__);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 1024, ct);
assert(r==0);
printf("%s:%d test0\n", __FILE__, __LINE__);
printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
r = close_brt(t); assert(r==0);
printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced); assert(r==0);
r = cachetable_close(ct);
assert(r==0);
memory_check_all_free();
}
static void test1 (void) {
BRT t;
int r;
CACHETABLE ct;
char fname[]="testbrt.brt";
memory_check=1;
memory_check_all_free();
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 1024, ct);
assert(r==0);
brt_insert(t, "hello", 6, "there", 6);
{
bytevec val; ITEMLEN vallen;
r = brt_lookup(t, "hello", 6, &val, &vallen);
assert(r==0);
assert(strcmp(val, "there")==0);
assert(vallen==6);
}
r = close_brt(t); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
printf("test1 ok\n");
}
static void test2 (int memcheck) {
BRT t;
int r;
int i;
CACHETABLE ct;
char fname[]="testbrt.brt";
memory_check=memcheck;
printf("%s:%d checking\n", __FILE__, __LINE__);
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 1024, ct);
printf("%s:%d did setup\n", __FILE__, __LINE__);
assert(r==0);
for (i=0; i<2048; i++) {
char key[100],val[100];
snprintf(key,100,"hello%d",i);
snprintf(val,100,"there%d",i);
brt_insert(t, key, 1+strlen(key), val, 1+strlen(val));
//printf("%s:%d did insert %d\n", __FILE__, __LINE__, i);
if (0) {
brt_flush(t);
{
int n = get_n_items_malloced();
printf("%s:%d i=%d n_items_malloced=%d\n", __FILE__, __LINE__, i, n);
if (n!=3) print_malloced_items();
assert(n==3);
}
}
}
printf("%s:%d inserted\n", __FILE__, __LINE__);
r = close_brt(t); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
printf("test2 ok\n");
}
static void test3 (int nodesize, int count, int memcheck) {
BRT t;
int r;
struct timeval t0,t1;
int i;
CACHETABLE ct;
char fname[]="testbrt.brt";
memory_check=memcheck;
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
gettimeofday(&t0, 0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, nodesize, ct);
assert(r==0);
for (i=0; i<count; i++) {
char key[100],val[100];
snprintf(key,100,"hello%d",i);
snprintf(val,100,"there%d",i);
brt_insert(t, key, 1+strlen(key), val, 1+strlen(val));
}
r = close_brt(t); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
gettimeofday(&t1, 0);
{
double tdiff = (t1.tv_sec-t0.tv_sec)+1e-6*(t1.tv_usec-t0.tv_usec);
printf("serial insertions: blocksize=%d %d insertions in %.3f seconds, %.2f insertions/second\n", nodesize, count, tdiff, count/tdiff);
}
}
static void test4 (int nodesize, int count, int memcheck) {
BRT t;
int r;
struct timeval t0,t1;
int i;
CACHETABLE ct;
char fname[]="testbrt.brt";
gettimeofday(&t0, 0);
unlink(fname);
memory_check=memcheck;
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(fname, 0, 1, &t, nodesize,ct); assert(r==0);
for (i=0; i<count; i++) {
char key[100],val[100];
int rv = random();
snprintf(key,100,"hello%d",rv);
snprintf(val,100,"there%d",i);
brt_insert(t, key, 1+strlen(key), val, 1+strlen(val));
}
r = close_brt(t); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
gettimeofday(&t1, 0);
{
double tdiff = (t1.tv_sec-t0.tv_sec)+1e-6*(t1.tv_usec-t0.tv_usec);
printf("random insertions: blocksize=%d %d insertions in %.3f seconds, %.2f insertions/second\n", nodesize, count, tdiff, count/tdiff);
}
}
static void test5 (void) {
int r;
BRT t;
int limit=100000;
int *values;
int i;
CACHETABLE ct;
char fname[]="testbrt.brt";
memory_check_all_free();
MALLOC_N(limit,values);
for (i=0; i<limit; i++) values[i]=-1;
unlink(fname);
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(fname, 0, 1, &t, 1<<12, ct); assert(r==0);
for (i=0; i<limit/2; i++) {
char key[100],val[100];
int rk = random()%limit;
int rv = random();
if (i%1000==0) printf("w"); fflush(stdout);
values[rk] = rv;
snprintf(key, 100, "key%d", rk);
snprintf(val, 100, "val%d", rv);
brt_insert(t, key, 1+strlen(key), val, 1+strlen(val));
}
printf("\n");
for (i=0; i<limit/2; i++) {
int rk = random()%limit;
if (values[rk]>=0) {
char key[100], valexpected[100];
bytevec val;
ITEMLEN vallen;
if (i%1000==0) printf("r"); fflush(stdout);
snprintf(key, 100, "key%d", rk);
snprintf(valexpected, 100, "val%d", values[rk]);
r = brt_lookup(t, key, 1+strlen(key), &val, &vallen);
assert(r==0);
assert(vallen==(1+strlen(valexpected)));
assert(memcmp(val,valexpected,vallen)==0);
}
}
printf("\n");
my_free(values);
r = close_brt(t); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
}
static void test_dump_empty_db (void) {
BRT t;
CACHETABLE ct;
int r;
char fname[]="testbrt.brt";
memory_check=1;
r = brt_create_cachetable(&ct, 0);
assert(r==0);
unlink(fname);
r = open_brt(fname, 0, 1, &t, 1024, ct);
assert(r==0);
dump_brt(t);
r = close_brt(t); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
}
/* Test running multiple trees in different files */
static void test_multiple_files_of_size (int size) {
const char *n0 = "test0.brt";
const char *n1 = "test1.brt";
CACHETABLE ct;
BRT t0,t1;
int r,i;
printf("test_multiple_files_of_size(%d)\n", size);
unlink(n0);
unlink(n1);
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n0, 0, 1, &t0, size, ct); assert(r==0);
r = open_brt(n1, 0, 1, &t1, size, ct); assert(r==0);
for (i=0; i<10000; i++) {
char key[100],val[100];
snprintf(key, 100, "key%d", i);
snprintf(val, 100, "val%d", i);
brt_insert(t0, key, 1+strlen(key), val, 1+strlen(val));
snprintf(val, 100, "Val%d", i);
brt_insert(t1, key, 1+strlen(key), val, 1+strlen(val));
}
//verify_brt(t0);
//dump_brt(t0);
//dump_brt(t1);
verify_brt(t0);
verify_brt(t1);
r = close_brt(t0); assert(r==0);
r = close_brt(t1); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
/* Now see if the data is all there. */
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n0, 0, 0, &t0, 1<<12, ct);
printf("%s:%d r=%d\n", __FILE__, __LINE__,r);
assert(r==0);
r = open_brt(n1, 0, 0, &t1, 1<<12, ct); assert(r==0);
for (i=0; i<10000; i++) {
char key[100],val[100];
bytevec actualval;
ITEMLEN actuallen;
snprintf(key, 100, "key%d", i);
snprintf(val, 100, "val%d", i);
r=brt_lookup(t0, key, 1+strlen(key), &actualval, &actuallen);
assert(r==0);
assert(strcmp(val,actualval)==0);
assert(actuallen==1+strlen(val));
snprintf(val, 100, "Val%d", i);
r=brt_lookup(t1, key, 1+strlen(key), &actualval, &actuallen);
assert(r==0);
assert(strcmp(val,actualval)==0);
assert(actuallen==1+strlen(val));
}
r = close_brt(t0); assert(r==0);
r = close_brt(t1); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
}
static void test_multiple_files (void) {
test_multiple_files_of_size (1<<12);
test_multiple_files_of_size (1<<20);
}
static void test_named_db (void) {
const char *n0 = "test0.brt";
const char *n1 = "test1.brt";
CACHETABLE ct;
BRT t0;
int r;
printf("test_named_db\n");
unlink(n0);
unlink(n1);
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n0, "db1", 1, &t0, 1<<12, ct); assert(r==0);
brt_insert(t0, "good", 5, "day", 4); assert(r==0);
r = close_brt(t0); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n0, "db1", 0, &t0, 1<<12, ct); assert(r==0);
{
bytevec val;
ITEMLEN vallen;
r = brt_lookup(t0, "good", 5, &val, &vallen);
assert(r==0);
assert(vallen==4);
assert(strcmp(val,"day")==0);
}
r = close_brt(t0); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
}
static void test_multiple_dbs (void) {
const char *n0 = "test0.brt";
const char *n1 = "test1.brt";
CACHETABLE ct;
BRT t0,t1;
int r;
printf("test_multiple_dbs: ");
unlink(n0);
unlink(n1);
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n0, "db1", 1, &t0, 1<<12, ct); assert(r==0);
r = open_brt(n1, "db2", 1, &t1, 1<<12, ct); assert(r==0);
brt_insert(t0, "good", 5, "grief", 6); assert(r==0);
brt_insert(t1, "bad", 4, "night", 6); assert(r==0);
r = close_brt(t0); assert(r==0);
r = close_brt(t1); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n0, "db1", 0, &t0, 1<<12, ct); assert(r==0);
r = open_brt(n1, "db2", 0, &t1, 1<<12, ct); assert(r==0);
{
bytevec val;
ITEMLEN vallen;
r = brt_lookup(t0, "good", 5, &val, &vallen);
assert(r==0);
assert(vallen==6);
assert(strcmp(val,"grief")==0);
r = brt_lookup(t1, "good", 5, &val, &vallen);
assert(r!=0);
r = brt_lookup(t0, "bad", 4, &val, &vallen);
assert(r!=0);
r = brt_lookup(t1, "bad", 4, &val, &vallen);
assert(r==0);
assert(vallen==6);
assert(strcmp(val,"night")==0);
}
r = close_brt(t0); assert(r==0);
r = close_brt(t1); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
printf("ok\n");
}
/* Test to see a single file can contain many databases. */
static void test_multiple_dbs_many (void) {
enum { MANYN = 16 };
int i, r;
const char *name = "test.brt";
CACHETABLE ct;
BRT trees[MANYN];
printf("test_multiple_dbs_many:\n");
memory_check_all_free();
unlink(name);
r = brt_create_cachetable(&ct, MANYN+4); assert(r==0);
for (i=0; i<MANYN; i++) {
char dbname[20];
snprintf(dbname, 20, "db%d", i);
r = open_brt(name, dbname, 1, &trees[i], 1<<12, ct);
assert(r==0);
}
for (i=0; i<MANYN; i++) {
char k[20], v[20];
snprintf(k, 20, "key%d", i);
snprintf(v, 20, "val%d", i);
brt_insert(trees[i], k, strlen(k)+1, v, strlen(v)+1);
}
for (i=0; i<MANYN; i++) {
r = close_brt(trees[i]); assert(r==0);
}
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
}
/* Test to see that a single db can be opened many times. */
static void test_multiple_brts_one_db_one_file (void) {
enum { MANYN = 2 };
int i, r;
const char *name = "test.brt";
CACHETABLE ct;
BRT trees[MANYN];
printf("test_multiple_brts_one_db_one_file:");
memory_check_all_free();
unlink(name);
r = brt_create_cachetable(&ct, 32); assert(r==0);
for (i=0; i<MANYN; i++) {
r = open_brt(name, 0, (i==0), &trees[i], 1<<12, ct);
assert(r==0);
}
for (i=0; i<MANYN; i++) {
char k[20], v[20];
snprintf(k, 20, "key%d", i);
snprintf(v, 20, "val%d", i);
brt_insert(trees[i], k, strlen(k)+1, v, strlen(v)+1);
}
for (i=0; i<MANYN; i++) {
char k[20],vexpect[20];
bytevec v;
ITEMLEN vlen;
snprintf(k, 20, "key%d", i);
snprintf(vexpect, 20, "val%d", i);
r=brt_lookup(trees[0], k, strlen(k)+1, &v, &vlen);
assert(r==0);
assert(vlen==1+strlen(vexpect));
assert(strcmp(v, vexpect)==0);
}
for (i=0; i<MANYN; i++) {
r=close_brt(trees[i]); assert(r==0);
}
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
printf(" ok\n");
}
/* Check to see if data can be read that was written. */
static void test_read_what_was_written (void) {
const char *n="testbrt.brt";
CACHETABLE ct;
BRT brt;
int r;
const int NVALS=10000;
printf("test_read_what_was_written(): "); fflush(stdout);
unlink(n);
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n, 0, 1, &brt, 1<<12, ct); assert(r==0);
r = close_brt(brt); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
/* Now see if we can read an empty tree in. */
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n, 0, 0, &brt, 1<<12, ct); assert(r==0);
/* See if we can put something in it. */
brt_insert(brt, "hello", 6, "there", 6);
r = close_brt(brt); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
/* Now see if we can read it in and get the value. */
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n, 0, 0, &brt, 1<<12, ct); assert(r==0);
{
bytevec val;
ITEMLEN vallen;
r = brt_lookup(brt, "hello", 6, &val, &vallen);
assert(r==0);
assert(vallen==6);
assert(strcmp(val,"there")==0);
}
assert(verify_brt(brt)==0);
/* Now put a bunch (VALS) of things in. */
{
int i;
for (i=0; i<NVALS; i++) {
char key[100],val[100];
snprintf(key, 100, "key%d", i);
snprintf(val, 100, "val%d", i);
if (i<600) {
int verify_result=verify_brt(brt);;
assert(verify_result==0);
}
brt_insert(brt, key, strlen(key)+1, val, strlen(val)+1);
if (i<600) {
int verify_result=verify_brt(brt);
if (verify_result) {
dump_brt(brt);
assert(0);
}
{
int j;
for (j=0; j<=i; j++) {
char expectedval[100];
bytevec val;
ITEMLEN vallen;
snprintf(key, 100, "key%d", j);
snprintf(expectedval, 100, "val%d", j);
r=brt_lookup(brt, key, strlen(key)+1, &val, &vallen);
if (r!=0) {
printf("%s:%d r=%d on lookup(key=%s) after i=%d\n", __FILE__, __LINE__, r, key, i);
dump_brt(brt);
}
assert(r==0);
}
}
}
}
}
printf("Now read them out\n");
//show_brt_blocknumbers(brt);
verify_brt(brt);
//dump_brt(brt);
/* See if we can read them all out again. */
{
int i;
for (i=0; i<NVALS; i++) {
char key[100],expectedval[100];
bytevec val;
ITEMLEN vallen;
snprintf(key, 100, "key%d", i);
snprintf(expectedval, 100, "val%d", i);
r=brt_lookup(brt, key, strlen(key)+1, &val, &vallen);
if (r!=0) printf("%s:%d r=%d on key=%s\n", __FILE__, __LINE__, r, key);
assert(r==0);
}
}
r = close_brt(brt); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
r = brt_create_cachetable(&ct, 0); assert(r==0);
r = open_brt(n, 0, 0, &brt, 1<<12, ct); assert(r==0);
{
bytevec val;
ITEMLEN vallen;
r = brt_lookup(brt, "hello", 6, &val, &vallen);
assert(r==0);
assert(vallen==6);
assert(strcmp(val,"there")==0);
}
{
int i;
for (i=0; i<NVALS; i++) {
char key[100],expectedval[100];
bytevec val;
ITEMLEN vallen;
snprintf(key, 100, "key%d", i);
snprintf(expectedval, 100, "val%d", i);
r=brt_lookup(brt, key, strlen(key)+1, &val, &vallen);
if (r!=0) printf("%s:%d r=%d on key=%s\n", __FILE__, __LINE__, r, key);
assert(r==0);
}
}
r = close_brt(brt); assert(r==0);
r = cachetable_close(ct); assert(r==0);
memory_check_all_free();
printf(" ok\n");
}
extern void pma_show_stats (void);
/* Test c_get(DB_LAST) on an empty tree */
void test_cursor_last_empty(void) {
const char *n="testbrt.brt";
CACHETABLE ct;
BRT brt;
BRT_CURSOR cursor;
int r;
DBT kbt, vbt;
printf("%s", __FUNCTION__);
unlink(n);
memory_check_all_free();
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
r = brt_create_cachetable(&ct, 0); assert(r==0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
r = open_brt(n, 0, 1, &brt, 1<<12, ct); assert(r==0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
r = brt_cursor(brt, &cursor); assert(r==0);
r = ybt_init(&kbt); assert(r==0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
r = ybt_init(&vbt); assert(r==0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
r = brt_c_get(cursor, &kbt, &vbt, DB_LAST);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
assert(r==DB_NOTFOUND);
r = brt_c_get(cursor, &kbt, &vbt, DB_FIRST);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
assert(r==DB_NOTFOUND);
r = close_brt(brt);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
r = cachetable_close(ct); assert(r==0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
memory_check_all_free();
}
static void brt_blackbox_test (void) {
test_multiple_dbs_many(); memory_check_all_free();
test_cursor_last_empty(); memory_check_all_free();
test_multiple_brts_one_db_one_file(); memory_check_all_free();
test_dump_empty_db(); memory_check_all_free();
test_read_what_was_written();
test_named_db();
memory_check_all_free();
test_multiple_dbs();
memory_check_all_free();
printf("test0 A\n");
test0();
printf("test0 B\n");
test0(); /* Make sure it works twice. */
printf("test1\n");
test1();
printf("test2 checking memory\n");
test2(1);
printf("test2 faster\n");
test2(0);
printf("test5\n");
test5();
printf("test_multiple_files\n");
test_multiple_files();
printf("test3 slow\n");
memory_check=0;
test3(2048, 1<<15, 1);
printf("test4 slow\n");
test4(2048, 1<<15, 1);
printf("test3 fast\n");
pma_show_stats();
test3(1<<15, 1024, 1);
test4(1<<15, 1024, 1);
printf("test3 fast\n");
test3(1<<18, 1<<20, 0);
test4(1<<18, 1<<20, 0);
// test3(1<<19, 1<<20, 0);
// test4(1<<19, 1<<20, 0);
// test3(1<<20, 1<<20, 0);
// test4(1<<20, 1<<20, 0);
// test3(1<<20, 1<<21, 0);
// test4(1<<20, 1<<21, 0);
// test3(1<<20, 1<<22, 0);
// test4(1<<20, 1<<22, 0);
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
brt_blackbox_test();
printf("ok\n");
return 0;
}
/* -*- mode: C; c-basic-offset: 4 -*- */
/* Buffered repository tree.
* Observation: The in-memory representation of a node doesn't have to be the same as the on-disk representation.
* Goal for the in-memory representation: fast
* Goal for on-disk: small
*
* So to get this running fast, I'll make a version that doesn't do range queries:
* use a hash table for in-memory
* simply write the strings on disk.
* Later I'll do a PMA or a skiplist for the in-memory version.
* Also, later I'll convert the format to network order fromn host order.
* Later, for on disk, I'll compress it (perhaps with gzip, perhaps with the bzip2 algorithm.)
*
* The collection of nodes forms a data structure like a B-tree. The complexities of keeping it balanced apply.
*
* We always write nodes to a new location on disk.
* The nodes themselves contain the information about the tree structure.
* Q: During recovery, how do we find the root node without looking at every block on disk?
* A: The root node is either the designated root near the front of the freelist.
* The freelist is updated infrequently. Before updating the stable copy of the freelist, we make sure that
* the root is up-to-date. We can make the freelist-and-root update be an arbitrarily small fraction of disk bandwidth.
*
*/
#include "brttypes.h"
#include "brt.h"
#include "memory.h"
#include "brt-internal.h"
#include "cachetable.h"
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
extern long long n_items_malloced;
/* Frees a node, including all the stuff in the hash table. */
void brtnode_free (BRTNODE node) {
int i;
//printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, node, node->mdicts[0]);
if (node->height>0) {
for (i=0; i<node->u.n.n_children-1; i++) {
my_free((void*)node->u.n.childkeys[i]);
}
for (i=0; i<node->u.n.n_children; i++) {
if (node->u.n.htables[i]) {
hashtable_free(&node->u.n.htables[i]);
}
}
} else {
if (node->u.l.buffer) // The buffer may have been freed already, in some cases.
pma_free(&node->u.l.buffer);
}
my_free(node);
}
void brtnode_flush_callback (CACHEFILE cachefile, diskoff nodename, void *brtnode_v, int write_me, int keep_me) {
BRTNODE brtnode = brtnode_v;
if (0) {
printf("%s:%d brtnode_flush_callback %p keep_me=%d height=%d", __FILE__, __LINE__, brtnode, keep_me, brtnode->height);
if (brtnode->height==0) printf(" pma=%p", brtnode->u.l.buffer);
printf("\n");
}
assert(brtnode->thisnodename==nodename);
//printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]);
if (write_me) {
serialize_brtnode_to(cachefile_fd(cachefile), brtnode->thisnodename, brtnode->nodesize, brtnode);
}
//printf("%s:%d %p->mdict[0]=%p\n", __FILE__, __LINE__, brtnode, brtnode->mdicts[0]);
if (!keep_me) {
brtnode_free(brtnode);
}
//printf("%s:%d n_items_malloced=%lld\n", __FILE__, __LINE__, n_items_malloced);
}
int brtnode_fetch_callback (CACHEFILE cachefile, diskoff nodename, void **brtnode_pv,void*extraargs) {
long nodesize=(long)extraargs;
BRTNODE *result=(BRTNODE*)brtnode_pv;
return deserialize_brtnode_from(cachefile_fd(cachefile), nodename, result, nodesize);
}
void brtheader_flush_callback (CACHEFILE cachefile, diskoff nodename, void *header_v, int write_me, int keep_me) {
struct brt_header *h = header_v;
assert(nodename==0);
assert(!h->dirty); // shouldn't be dirty once it is unpinned.
if (write_me) {
serialize_brt_header_to(cachefile_fd(cachefile), h);
}
if (!keep_me) {
if (h->n_named_roots>0) {
int i;
for (i=0; i<h->n_named_roots; i++) {
my_free(h->names[i]);
}
my_free(h->names);
my_free(h->roots);
}
my_free(h);
}
}
int brtheader_fetch_callback (CACHEFILE cachefile, diskoff nodename, void **headerp_v, void*extraargs __attribute__((__unused__))) {
struct brt_header **h = (struct brt_header **)headerp_v;
assert(nodename==0);
return deserialize_brtheader_from(cachefile_fd(cachefile), nodename, h);
}
int read_and_pin_brt_header (CACHEFILE cf, struct brt_header **header) {
void *header_p;
int r = cachetable_get_and_pin(cf, 0, &header_p,
brtheader_flush_callback, brtheader_fetch_callback, 0);
if (r!=0) return r;
*header = header_p;
return 0;
}
int unpin_brt_header (BRT brt) {
int r = cachetable_unpin(brt->cf, 0, brt->h->dirty);
brt->h->dirty=0;
brt->h=0;
return r;
}
typedef struct kvpair {
bytevec key;
unsigned int keylen;
bytevec val;
unsigned int vallen;
} *KVPAIR;
int kvpair_compare (const void *av, const void *bv) {
const KVPAIR a = (const KVPAIR)av;
const KVPAIR b = (const KVPAIR)bv;
int r = keycompare(a->key, a->keylen, b->key, b->keylen);
//printf("keycompare(%s,\n %s)-->%d\n", a->key, b->key, r);
return r;
}
#if 0
/* in a leaf, they are already sorted because they are in a PMA */
static void brtleaf_make_sorted_kvpairs (BRTNODE node, KVPAIR *pairs, int *n_pairs) {
int n_entries = mdict_n_entries(node->mdicts[0]);
KVPAIR result=my_calloc(n_entries, sizeof(*result));
int resultcounter=0;
assert(node->n_children==0 && node->height==0);
MDICT_ITERATE(node->mdicts[0], key, keylen, data, datalen, ({
result[resultcounter].key = key;
result[resultcounter].keylen = keylen;
result[resultcounter].val = data;
result[resultcounter].vallen = datalen;
resultcounter++;
}));
assert(resultcounter==n_entries);
qsort(result, resultcounter, sizeof(*result), kvpair_compare);
*pairs = result;
*n_pairs = resultcounter;
// {
// innt i;
// printf("Sorted pairs (sizeof *result=%d):\n", sizeof(*result));
// for (i=0; i<resultcounter; i++) {
// printf(" %s\n", result[i].key);
// }
//
// }
}
#endif
/* Forgot to handle the case where there is something in the freelist. */
diskoff malloc_diskblock_header_is_in_memory (BRT brt, int size) {
diskoff result = brt->h->unused_memory;
brt->h->unused_memory+=size;
return result;
}
diskoff malloc_diskblock (BRT brt, int size) {
#if 0
int r = read_and_pin_brt_header(brt->fd, &brt->h);
assert(r==0);
{
diskoff result = malloc_diskblock_header_is_in_memory(brt, size);
r = write_brt_header(brt->fd, &brt->h);
assert(r==0);
return result;
}
#else
return malloc_diskblock_header_is_in_memory(brt,size);
#endif
}
static void initialize_brtnode (BRT t, BRTNODE n, diskoff nodename, int height) {
int i;
n->tag = TYP_BRTNODE;
n->nodesize = t->h->nodesize;
n->thisnodename = nodename;
n->height = height;
assert(height>=0);
if (height>0) {
n->u.n.n_children = 0;
for (i=0; i<TREE_FANOUT; i++) {
n->u.n.childkeys[i] = 0;
n->u.n.childkeylens[i] = 0;
}
n->u.n.totalchildkeylens = 0;
for (i=0; i<TREE_FANOUT+1; i++) {
n->u.n.children[i] = 0;
n->u.n.htables[i] = 0;
n->u.n.n_bytes_in_hashtable[i] = 0;
}
n->u.n.n_bytes_in_hashtables = 0;
} else {
int r = pma_create(&n->u.l.buffer);
static int rcount=0;
assert(r==0);
//printf("%s:%d n PMA= %p (rcount=%d)\n", __FILE__, __LINE__, n->u.l.buffer, rcount);
rcount++;
n->u.l.n_bytes_in_buffer = 0;
}
}
static void create_new_brtnode (BRT t, BRTNODE *result, int height) {
TAGMALLOC(BRTNODE, n);
int r;
diskoff name = malloc_diskblock(t, t->h->nodesize);
assert(n);
assert(t->h->nodesize>0);
//printf("%s:%d malloced %lld (and malloc again=%lld)\n", __FILE__, __LINE__, name, malloc_diskblock(t, t->nodesize));
initialize_brtnode(t, n, name, height);
*result = n;
assert(n->nodesize>0);
r=cachetable_put(t->cf, n->thisnodename, n,
brtnode_flush_callback, brtnode_fetch_callback, (void*)t->h->nodesize);
assert(r==0);
}
void delete_node (BRT t, BRTNODE node) {
int i;
assert(node->height>=0);
if (node->height==0) {
if (node->u.l.buffer) {
pma_free(&node->u.l.buffer);
}
node->u.l.n_bytes_in_buffer=0;
} else {
for (i=0; i<node->u.n.n_children; i++) {
if (node->u.n.htables[i]) {
hashtable_free(&node->u.n.htables[i]);
}
node->u.n.n_bytes_in_hashtable[0]=0;
}
node->u.n.n_bytes_in_hashtables = 0;
node->u.n.totalchildkeylens=0;
node->u.n.n_children=0;
node->height=0;
node->u.l.buffer=0; /* It's a leaf now (height==0) so set the buffer to NULL. */
}
cachetable_remove(t->cf, node->thisnodename, 0); /* Don't write it back to disk. */
}
static void insert_to_buffer_in_leaf (BRTNODE node, bytevec key, unsigned int keylen, bytevec val, unsigned int vallen) {
unsigned int n_bytes_added = KEY_VALUE_OVERHEAD + keylen + vallen;
int r = pma_insert(node->u.l.buffer, key, keylen, val, vallen);
assert(r==0);
node->u.l.n_bytes_in_buffer += n_bytes_added;
}
static int insert_to_hash_in_nonleaf (BRTNODE node, int childnum, bytevec key, unsigned int keylen, bytevec val, unsigned int vallen) {
unsigned int n_bytes_added = KEY_VALUE_OVERHEAD + keylen + vallen;
int r = hash_insert(node->u.n.htables[childnum], key, keylen, val, vallen);
if (r!=0) return r;
node->u.n.n_bytes_in_hashtable[childnum] += n_bytes_added;
node->u.n.n_bytes_in_hashtables += n_bytes_added;
return 0;
}
int brtleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, bytevec*splitkey, ITEMLEN *splitkeylen) {
int did_split=0;
BRTNODE A,B;
assert(node->height==0);
assert(t->h->nodesize>=node->nodesize); /* otherwise we might be in trouble because the nodesize shrank. */
create_new_brtnode(t, &A, 0);
create_new_brtnode(t, &B, 0);
//printf("%s:%d A PMA= %p\n", __FILE__, __LINE__, A->u.l.buffer);
//printf("%s:%d B PMA= %p\n", __FILE__, __LINE__, A->u.l.buffer);
assert(A->nodesize>0);
assert(B->nodesize>0);
assert(node->nodesize>0);
//printf("%s:%d A is at %lld\n", __FILE__, __LINE__, A->thisnodename);
//printf("%s:%d B is at %lld nodesize=%d\n", __FILE__, __LINE__, B->thisnodename, B->nodesize);
assert(node->height>0 || node->u.l.buffer!=0);
PMA_ITERATE(node->u.l.buffer, key, keylen, val, vallen,
({
if (!did_split) {
insert_to_buffer_in_leaf(A, key, keylen, val, vallen);
if (A->u.l.n_bytes_in_buffer *2 >= node->u.l.n_bytes_in_buffer) {
*splitkey = memdup(key, keylen);
*splitkeylen = keylen;
did_split=1;
}
} else {
insert_to_buffer_in_leaf(B, key, keylen, val, vallen);
}
}));
assert(node->height>0 || node->u.l.buffer!=0);
/* Remove it from the cache table, and free its storage. */
//printf("%s:%d old pma = %p\n", __FILE__, __LINE__, node->u.l.buffer);
delete_node(t, node);
assert(did_split==1);
*nodea = A;
*nodeb = B;
assert(serialize_brtnode_size(A)<A->nodesize);
assert(serialize_brtnode_size(B)<B->nodesize);
return 0;
}
void brt_nonleaf_split (BRT t, BRTNODE node, BRTNODE *nodea, BRTNODE *nodeb, bytevec*splitkey, ITEMLEN *splitkeylen) {
int n_children_in_a = node->u.n.n_children/2;
BRTNODE A,B;
assert(node->height>0);
assert(node->u.n.n_children>=2); // Otherwise, how do we split? We need at least two children to split. */
assert(t->h->nodesize>=node->nodesize); /* otherwise we might be in trouble because the nodesize shrank. */
create_new_brtnode(t, &A, node->height);
create_new_brtnode(t, &B, node->height);
A->u.n.n_children=n_children_in_a;
B->u.n.n_children=node->u.n.n_children-n_children_in_a;
//printf("%s:%d A is at %lld\n", __FILE__, __LINE__, A->thisnodename);
{
/* The first n_children_in_a go into node a.
* That means that the first n_children_in_a-1 keys go into node a.
* The splitter key is key number n_children_in_a */
int i;
for (i=0; i<n_children_in_a; i++) {
A->u.n.children[i] = node->u.n.children[i];
A->u.n.htables[i] = node->u.n.htables[i];
A->u.n.n_bytes_in_hashtables += (A->u.n.n_bytes_in_hashtable[i] = node->u.n.n_bytes_in_hashtable[i]);
node->u.n.htables[i] = 0;
node->u.n.n_bytes_in_hashtables -= node->u.n.n_bytes_in_hashtable[i];
node->u.n.n_bytes_in_hashtable[i] = 0;
}
for (i=n_children_in_a; i<node->u.n.n_children; i++) {
int targchild = i-n_children_in_a;
B->u.n.children[targchild] = node->u.n.children[i];
B->u.n.htables[targchild] = node->u.n.htables[i];
B->u.n.n_bytes_in_hashtables += (B->u.n.n_bytes_in_hashtable[targchild] = node->u.n.n_bytes_in_hashtable[i]);
node->u.n.htables[i] = 0;
node->u.n.n_bytes_in_hashtables -= node->u.n.n_bytes_in_hashtable[i];
node->u.n.n_bytes_in_hashtable[i] = 0;
}
for (i=0; i<n_children_in_a-1; i++) {
A->u.n.childkeys[i] = node->u.n.childkeys[i];
A->u.n.childkeylens[i] = node->u.n.childkeylens[i];
A->u.n.totalchildkeylens += node->u.n.childkeylens[i];
node->u.n.totalchildkeylens -= node->u.n.childkeylens[i];
node->u.n.childkeys[i] = 0;
node->u.n.childkeylens[i] = 0;
}
*splitkey = node->u.n.childkeys[n_children_in_a-1];
*splitkeylen = node->u.n.childkeylens[n_children_in_a-1];
node->u.n.totalchildkeylens -= node->u.n.childkeylens[n_children_in_a-1];
node->u.n.childkeys[n_children_in_a-1]=0;
node->u.n.childkeylens[n_children_in_a-1]=0;
for (i=n_children_in_a; i<node->u.n.n_children-1; i++) {
B->u.n.childkeys[i-n_children_in_a] = node->u.n.childkeys[i];
B->u.n.childkeylens[i-n_children_in_a] = node->u.n.childkeylens[i];
B->u.n.totalchildkeylens += node->u.n.childkeylens[i];
node->u.n.totalchildkeylens -= node->u.n.childkeylens[i];
node->u.n.childkeys[i] = 0;
node->u.n.childkeylens[i] = 0;
}
assert(node->u.n.totalchildkeylens==0);
}
{
int i;
for (i=0; i<TREE_FANOUT+1; i++) {
assert(node->u.n.htables[i]==0);
assert(node->u.n.n_bytes_in_hashtable[i]==0);
}
assert(node->u.n.n_bytes_in_hashtables==0);
}
/* The buffer is all divied up between them, since just moved the hashtables over. */
*nodea = A;
*nodeb = B;
/* Remove it from the cache table, and free its storage. */
//printf("%s:%d removing %lld\n", __FILE__, __LINE__, node->thisnodename);
delete_node(t, node);
assert(serialize_brtnode_size(A)<A->nodesize);
assert(serialize_brtnode_size(B)<B->nodesize);
}
void find_heaviest_child (BRTNODE node, int *childnum) {
int max_child = 0;
int max_weight = node->u.n.n_bytes_in_hashtable[0];
int i;
assert(node->u.n.n_children>0);
for (i=1; i<node->u.n.n_children; i++) {
int this_weight = node->u.n.n_bytes_in_hashtable[i];
if (max_weight < this_weight) {
max_child = i;
max_weight = this_weight;
}
}
*childnum = max_child;
}
#if 0
void find_heaviest_data (BRTNODE node, int *childnum_ret, KVPAIR *pairs_ret, int *n_pairs_ret) {
int child_weights[node->n_children];
int child_counts[node->n_children];
int i;
for (i=0; i<node->n_children; i++) child_weights[i] = child_counts[i] = 0;
HASHTABLE_ITERATE(node->hashtable, key, keylen, data __attribute__((__unused__)), datalen,
({
int cnum;
for (cnum=0; cnum<node->n_children-1; cnum++) {
if (keycompare(key, keylen, node->childkeys[cnum], node->childkeylens[cnum])<=0)
break;
}
child_weights[cnum] += keylen + datalen + KEY_VALUE_OVERHEAD;
child_counts[cnum]++;
}));
{
int maxchild=0, maxchildweight=child_weights[0];
for (i=1; i<node->n_children; i++) {
if (maxchildweight<child_weights[i]) {
maxchildweight=child_weights[i];
maxchild = i;
}
}
/* Now we know the maximum child. */
{
int maxchildcount = child_counts[maxchild];
KVPAIR pairs = my_calloc(maxchildcount, sizeof(*pairs));
{
int pairs_count=0;
HASHTABLE_ITERATE(node->hashtable, key, keylen, data, datalen, ({
int cnum;
for (cnum=0; cnum<node->n_children-1; cnum++) {
if (keycompare(key, keylen, node->childkeys[cnum], node->childkeylens[cnum])<=0)
break;
}
if (cnum==maxchild) {
pairs[pairs_count].key = key;
pairs[pairs_count].keylen = keylen;
pairs[pairs_count].val = data;
pairs[pairs_count].vallen = datalen;
pairs_count++;
}
}));
}
/* Now we have the pairs. */
*childnum_ret = maxchild;
*pairs_ret = pairs;
*n_pairs_ret = maxchildcount;
}
}
}
#endif
static int brtnode_insert (BRT t, BRTNODE node, bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, bytevec*splitkey, ITEMLEN *splitkeylen,
int debug);
/* key is not in the hashtable in node. Either put the key-value pair in the child, or put it in the node. */
static int push_kvpair_down_only_if_it_wont_push_more_else_put_here (BRT t, BRTNODE node, BRTNODE child,
bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen,
int childnum_of_node) {
assert(node->height>0); /* Not a leaf. */
int to_child=serialize_brtnode_size(child)+keylen+vallen+KEY_VALUE_OVERHEAD <= child->nodesize;
if (brt_debug_mode) {
printf("%s:%d pushing %s to %s %d", __FILE__, __LINE__, (char*)key, to_child? "child" : "hash", childnum_of_node);
if (childnum_of_node+1<node->u.n.n_children) {
printf(" nextsplitkey=%s\n", (char*)node->u.n.childkeys[childnum_of_node]);
assert(keycompare(key, keylen, node->u.n.childkeys[childnum_of_node], node->u.n.childkeylens[childnum_of_node])<=0);
} else {
printf("\n");
}
}
if (to_child) {
int again_split=-1; BRTNODE againa,againb; bytevec againkey; ITEMLEN againlen;
//printf("%s:%d hello!\n", __FILE__, __LINE__);
int r = brtnode_insert(t, child, key, keylen, val, vallen,
&again_split, &againa, &againb, &againkey, &againlen,
0);
if (r!=0) return r;
assert(again_split==0); /* I only did the insert if I knew it wouldn't push down, and hence wouldn't split. */
return r;
} else {
int r=insert_to_hash_in_nonleaf(node, childnum_of_node, key, keylen, val, vallen);
return r;
}
}
static int push_a_kvpair_down (BRT t, BRTNODE node, BRTNODE child, int childnum,
bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen,
int *child_did_split, BRTNODE *childa, BRTNODE *childb, bytevec*childsplitkey, ITEMLEN *childsplitkeylen) {
//if (debug) printf("%s:%d %*sinserting down\n", __FILE__, __LINE__, debug, "");
//printf("%s:%d hello!\n", __FILE__, __LINE__);
assert(node->height>0);
{
int r = brtnode_insert(t, child, key, keylen, val, vallen,
child_did_split, childa, childb, childsplitkey, childsplitkeylen,
0);
if (r!=0) return r;
}
//if (debug) printf("%s:%d %*sinserted down child_did_split=%d\n", __FILE__, __LINE__, debug, "", child_did_split);
{
int r = hash_delete(node->u.n.htables[childnum], key, keylen); // Must delete after doing the insert, to avoid operating on freed' key
if (r!=0) return r;
}
{
int n_bytes_removed = (keylen + vallen + KEY_VALUE_OVERHEAD);
node->u.n.n_bytes_in_hashtables -= n_bytes_removed;
node->u.n.n_bytes_in_hashtable[childnum] -= n_bytes_removed;
}
return 0;
}
int split_count=0;
/* NODE is a node with a child.
* childnum was split into two nodes childa, and childb.
* We must slide things around, & move things from the old table to the new tables.
* We also move things to the new children as much as we an without doing any pushdowns or splitting of the child.
* We must delete the old hashtable (but the old child is already deleted.)
* We also unpin the new children.
*/
static int handle_split_of_child (BRT t, BRTNODE node, int childnum,
BRTNODE childa, BRTNODE childb, bytevec childsplitkey, ITEMLEN childsplitkeylen,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, bytevec*splitkey, ITEMLEN *splitkeylen) {
assert(node->height>0);
HASHTABLE old_h = node->u.n.htables[childnum];
int old_count = node->u.n.n_bytes_in_hashtable[childnum];
int cnum;
int r;
assert(node->u.n.n_children<=TREE_FANOUT);
if (brt_debug_mode) {
int i;
printf("%s:%d Child %d did split on %s\n", __FILE__, __LINE__, childnum, (char*)childsplitkey);
printf("%s:%d oldsplitkeys:", __FILE__, __LINE__);
for(i=0; i<node->u.n.n_children-1; i++) printf(" %s", (char*)node->u.n.childkeys[i]);
printf("\n");
}
// Slide the children over.
for (cnum=node->u.n.n_children; cnum>childnum+1; cnum--) {
node->u.n.children[cnum] = node->u.n.children[cnum-1];
node->u.n.htables[cnum] = node->u.n.htables[cnum-1];
node->u.n.n_bytes_in_hashtable[cnum] = node->u.n.n_bytes_in_hashtable[cnum-1];
}
node->u.n.children[childnum] = childa->thisnodename;
node->u.n.children[childnum+1] = childb->thisnodename;
hashtable_create(&node->u.n.htables[childnum]);
hashtable_create(&node->u.n.htables[childnum+1]);
node->u.n.n_bytes_in_hashtable[childnum] = 0;
node->u.n.n_bytes_in_hashtable[childnum+1] = 0;
// Slide the keys over
for (cnum=node->u.n.n_children-1; cnum>childnum; cnum--) {
node->u.n.childkeys[cnum] = node->u.n.childkeys[cnum-1];
node->u.n.childkeylens[cnum] = node->u.n.childkeylens[cnum-1];
}
node->u.n.childkeys[childnum]=childsplitkey;
node->u.n.childkeylens[childnum]= childsplitkeylen;
node->u.n.totalchildkeylens += childsplitkeylen;
node->u.n.n_children++;
if (brt_debug_mode) {
int i;
printf("%s:%d splitkeys:", __FILE__, __LINE__);
for(i=0; i<node->u.n.n_children-1; i++) printf(" %s", (char*)node->u.n.childkeys[i]);
printf("\n");
}
node->u.n.n_bytes_in_hashtables -= old_count; /* By default, they are all removed. We might add them back in. */
/* Keep pushing to the children, but not if the children would require a pushdown */
HASHTABLE_ITERATE(old_h, skey, skeylen, sval, svallen, ({
if (keycompare(skey, skeylen, childsplitkey, childsplitkeylen)<=0) {
r=push_kvpair_down_only_if_it_wont_push_more_else_put_here(t, node, childa, skey, skeylen, sval, svallen, childnum);
} else {
r=push_kvpair_down_only_if_it_wont_push_more_else_put_here(t, node, childb, skey, skeylen, sval, svallen, childnum+1);
}
if (r!=0) return r;
}));
hashtable_free(&old_h);
r=cachetable_unpin(t->cf, childa->thisnodename, 1);
assert(r==0);
r=cachetable_unpin(t->cf, childb->thisnodename, 1);
assert(r==0);
verify_counts(node);
verify_counts(childa);
verify_counts(childb);
if (node->u.n.n_children>TREE_FANOUT) {
//printf("%s:%d about to split having pushed %d out of %d keys\n", __FILE__, __LINE__, i, n_pairs);
brt_nonleaf_split(t, node, nodea, nodeb, splitkey, splitkeylen);
//printf("%s:%d did split\n", __FILE__, __LINE__);
split_count++;
*did_split=1;
assert((*nodea)->height>0);
assert((*nodeb)->height>0);
assert((*nodea)->u.n.n_children>0);
assert((*nodeb)->u.n.n_children>0);
assert((*nodea)->u.n.children[(*nodea)->u.n.n_children-1]!=0);
assert((*nodeb)->u.n.children[(*nodeb)->u.n.n_children-1]!=0);
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
} else {
*did_split=0;
assert(serialize_brtnode_size(node)<=node->nodesize);
}
return 0;
}
static int push_some_kvpairs_down (BRT t, BRTNODE node, int childnum,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, bytevec *splitkey, ITEMLEN *splitkeylen,
int debug) {
void *childnode_v;
BRTNODE child;
int r;
assert(node->height>0);
diskoff targetchild = node->u.n.children[childnum];
assert(targetchild>=0 && targetchild<t->h->unused_memory); // This assertion could fail in a concurrent setting since another process might have bumped unused memory.
r = cachetable_get_and_pin(t->cf, targetchild, &childnode_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)t->h->nodesize);
if (r!=0) return r;
child=childnode_v;
verify_counts(child);
//printf("%s:%d height=%d n_bytes_in_hashtable = {%d, %d, %d, ...}\n", __FILE__, __LINE__, child->height, child->n_bytes_in_hashtable[0], child->n_bytes_in_hashtable[1], child->n_bytes_in_hashtable[2]);
if (child->height>0 && child->u.n.n_children>0) assert(child->u.n.children[child->u.n.n_children-1]!=0);
if (debug) printf("%s:%d %*spush_some_kvpairs_down to %lld\n", __FILE__, __LINE__, debug, "", child->thisnodename);
/* I am exposing the internals of the hash table here, mostly because I am not thinking of a really
* good way to do it otherwise. I want to loop over the elements of the hash table, deleting some as I
* go. The HASHTABLE_ITERATE macro will break if I delete something from the hash table. */
{
bytevec key,val;
ITEMLEN keylen, vallen;
while(0==hashtable_random_pick(node->u.n.htables[childnum], &key, &keylen, &val, &vallen)) {
int child_did_split=0; BRTNODE childa, childb; bytevec childsplitkey; ITEMLEN childsplitkeylen;
if (debug) printf("%s:%d %*spush down %s\n", __FILE__, __LINE__, debug, "", (char*)key);
r = push_a_kvpair_down (t, node, child, childnum,
key, keylen, val, vallen,
&child_did_split, &childa, &childb, &childsplitkey, &childsplitkeylen);
if (r!=0) return r;
if (child_did_split) {
// If the child splits, we don't push down any further.
if (debug) printf("%s:%d %*shandle split splitkey=%s\n", __FILE__, __LINE__, debug, "", (char*)childsplitkey);
r=handle_split_of_child (t, node, childnum,
childa, childb, childsplitkey, childsplitkeylen,
did_split, nodea, nodeb, splitkey, splitkeylen);
return r; /* Don't do any more pushing if the child splits. */
}
}
}
if (debug) printf("%s:%d %*sdone push_some_kvpairs_down, unpinning %lld\n", __FILE__, __LINE__, debug, "", targetchild);
r=cachetable_unpin(t->cf, targetchild, 1);
if (r!=0) return r;
*did_split=0;
return 0;
}
int debugp1 (int debug) {
return debug ? debug+1 : 0;
}
static int brtnode_maybe_push_down(BRT t, BRTNODE node, int *did_split, BRTNODE *nodea, BRTNODE *nodeb, bytevec *splitkey, ITEMLEN *splitkeylen, int debug)
/* If the buffer is too full, then push down. Possibly the child will split. That may make us split. */
{
assert(node->height>0);
if (debug) printf("%s:%d %*sIn maybe_push_down in_buffer=%d childkeylens=%d size=%d\n", __FILE__, __LINE__, debug, "", node->u.n.n_bytes_in_hashtables, node->u.n.totalchildkeylens, serialize_brtnode_size(node));
if (serialize_brtnode_size(node) > node->nodesize ) {
if (debug) printf("%s:%d %*stoo full, height=%d\n", __FILE__, __LINE__, debug, "", node->height);
{
/* Push to a child. */
/* Find the heaviest child, and push stuff to it. Keep pushing to the child until we run out.
* But if the child pushes something to its child and our buffer has gotten small enough, then we stop pushing. */
int childnum;
if (debug) printf("%s:%d %*sfind_heaviest_data\n", __FILE__, __LINE__, debug, "");
find_heaviest_child(node, &childnum);
if (debug) printf("%s:%d %*spush some down from %lld into %lld\n", __FILE__, __LINE__, debug, "", node->thisnodename, node->u.n.children[childnum]);
assert(node->u.n.children[childnum]!=0);
int r = push_some_kvpairs_down(t, node, childnum, did_split, nodea, nodeb, splitkey, splitkeylen, debugp1(debug));
if (r!=0) return r;
assert(*did_split==0 || *did_split==1);
if (debug) printf("%s:%d %*sdid push_some_kvpairs_down did_split=%d\n", __FILE__, __LINE__, debug, "", *did_split);
if (*did_split) {
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
assert((*nodea)->u.n.n_children>0);
assert((*nodeb)->u.n.n_children>0);
assert((*nodea)->u.n.children[(*nodea)->u.n.n_children-1]!=0);
assert((*nodeb)->u.n.children[(*nodeb)->u.n.n_children-1]!=0);
} else {
assert(serialize_brtnode_size(node)<=node->nodesize);
}
}
} else {
*did_split=0;
assert(serialize_brtnode_size(node)<=node->nodesize);
}
return 0;
}
static int brt_leaf_insert (BRT t, BRTNODE node, bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, bytevec*splitkey, ITEMLEN *splitkeylen,
int debug) {
bytevec olddata;
ITEMLEN olddatalen;
enum pma_errors pma_status = pma_lookup(node->u.l.buffer, key, keylen, &olddata, &olddatalen);
if (pma_status==BRT_OK) {
pma_status = pma_delete(node->u.l.buffer, key, keylen);
assert(pma_status==BRT_OK);
node->u.l.n_bytes_in_buffer -= keylen + olddatalen + KEY_VALUE_OVERHEAD;
}
pma_status = pma_insert(node->u.l.buffer, key, keylen, val, vallen);
node->u.l.n_bytes_in_buffer += keylen + vallen + KEY_VALUE_OVERHEAD;
// If it doesn't fit, then split the leaf.
if (serialize_brtnode_size(node) > node->nodesize) {
int r = brtleaf_split (t, node, nodea, nodeb, splitkey, splitkeylen);
if (r!=0) return r;
//printf("%s:%d splitkey=%s\n", __FILE__, __LINE__, (char*)*splitkey);
split_count++;
*did_split = 1;
verify_counts(*nodea); verify_counts(*nodeb);
if (debug) printf("%s:%d %*snodeb->thisnodename=%lld nodeb->size=%d\n", __FILE__, __LINE__, debug, "", (*nodeb)->thisnodename, (*nodeb)->nodesize);
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
} else {
*did_split = 0;
}
return 0;
}
static int brt_nonleaf_insert (BRT t, BRTNODE node, bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, bytevec*splitkey, ITEMLEN *splitkeylen,
int debug) {
bytevec olddata;
ITEMLEN olddatalen;
unsigned int childnum = brtnode_which_child(node, key, keylen);
int found = !hash_find(node->u.n.htables[childnum], key, keylen, &olddata, &olddatalen);
if (0) { // It is faster to do this, except on yobiduck where things grind to a halt.
void *child_v;
if (node->height>0 &&
0 == cachetable_maybe_get_and_pin(t->cf, node->u.n.children[childnum], &child_v)) {
/* If the child is in memory, then go ahead and put it in the child. */
BRTNODE child = child_v;
if (found) {
int diff = keylen + olddatalen + KEY_VALUE_OVERHEAD;
int r = hash_delete(node->u.n.htables[childnum], key, keylen);
assert(r==0);
node->u.n.n_bytes_in_hashtables -= diff;
node->u.n.n_bytes_in_hashtable[childnum] -= diff;
}
{
int child_did_split;
BRTNODE childa, childb;
bytevec childsplitkey;
ITEMLEN childsplitkeylen;
int r = brtnode_insert(t, child, key, keylen, val, vallen,
&child_did_split, &childa, &childb, &childsplitkey, &childsplitkeylen, 0);
if (r!=0) return r;
if (child_did_split) {
r=handle_split_of_child(t, node, childnum,
childa, childb, childsplitkey, childsplitkeylen,
did_split, nodea, nodeb, splitkey, splitkeylen);
if (r!=0) return r;
} else {
cachetable_unpin(t->cf, child->thisnodename, 1);
*did_split = 0;
}
}
return 0;
}
}
if (debug) printf("%s:%d %*sDoing hash_insert\n", __FILE__, __LINE__, debug, "");
verify_counts(node);
if (found) {
int r = hash_delete(node->u.n.htables[childnum], key, keylen);
int diff = keylen + olddatalen + KEY_VALUE_OVERHEAD;
assert(r==0);
node->u.n.n_bytes_in_hashtables -= diff;
node->u.n.n_bytes_in_hashtable[childnum] -= diff;
//printf("%s:%d deleted %d bytes\n", __FILE__, __LINE__, diff);
}
{
int diff = keylen + vallen + KEY_VALUE_OVERHEAD;
int r=hash_insert(node->u.n.htables[childnum], key, keylen, val, vallen);
assert(r==0);
node->u.n.n_bytes_in_hashtables += diff;
node->u.n.n_bytes_in_hashtable[childnum] += diff;
}
if (debug) printf("%s:%d %*sDoing maybe_push_down\n", __FILE__, __LINE__, debug, "");
int r = brtnode_maybe_push_down(t, node, did_split, nodea, nodeb, splitkey, splitkeylen, debugp1(debug));
if (r!=0) return r;
if (debug) printf("%s:%d %*sDid maybe_push_down\n", __FILE__, __LINE__, debug, "");
if (*did_split) {
assert(serialize_brtnode_size(*nodea)<=(*nodea)->nodesize);
assert(serialize_brtnode_size(*nodeb)<=(*nodeb)->nodesize);
assert((*nodea)->u.n.n_children>0);
assert((*nodeb)->u.n.n_children>0);
assert((*nodea)->u.n.children[(*nodea)->u.n.n_children-1]!=0);
assert((*nodeb)->u.n.children[(*nodeb)->u.n.n_children-1]!=0);
verify_counts(*nodea);
verify_counts(*nodeb);
} else {
assert(serialize_brtnode_size(node)<=node->nodesize);
verify_counts(node);
}
return 0;
}
static int brtnode_insert (BRT t, BRTNODE node, bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen,
int *did_split, BRTNODE *nodea, BRTNODE *nodeb, bytevec*splitkey, ITEMLEN *splitkeylen,
int debug) {
if (node->height==0) {
return brt_leaf_insert(t, node, key, keylen, val, vallen,
did_split, nodea, nodeb, splitkey, splitkeylen,
debug);
} else {
return brt_nonleaf_insert(t, node, key, keylen, val, vallen,
did_split, nodea, nodeb, splitkey, splitkeylen,
debug);
}
}
enum {n_nodes_in_cache =64};
int brt_create_cachetable (CACHETABLE *ct, int cachelines) {
if (cachelines==0) cachelines=n_nodes_in_cache;
assert(cachelines>0);
return create_cachetable(ct, cachelines);
}
static int setup_brt_root_node (BRT t, diskoff offset) {
int r;
BRTNODE MALLOC(node);
assert(node);
//printf("%s:%d\n", __FILE__, __LINE__);
initialize_brtnode(t, node,
offset, /* the location is one nodesize offset from 0. */
0);
if (0) {
printf("%s:%d for tree %p node %p mdict_create--> %p\n", __FILE__, __LINE__, t, node, node->u.l.buffer);
printf("%s:%d put root at %lld\n", __FILE__, __LINE__, offset);
}
r=cachetable_put(t->cf, offset, node,
brtnode_flush_callback, brtnode_fetch_callback, (void*)t->h->nodesize);
if (r!=0) {
my_free(node);
return r;
}
//printf("%s:%d created %lld\n", __FILE__, __LINE__, node->thisnodename);
verify_counts(node);
r=cachetable_unpin(t->cf, node->thisnodename, 1);
if (r!=0) {
my_free(node);
return r;
}
return 0;
}
#define BRT_TRACE
#ifdef BRT_TRACE
#define WHEN_BRTTRACE(x) x
#else
#define WHEN_BRTTRACE(x) ((void)0)
#endif
int open_brt (const char *fname, const char *dbname, int is_create, BRT *newbrt, int nodesize, CACHETABLE cachetable) {
/* If dbname is NULL then we setup to hold a single tree. Otherwise we setup an array. */
int r;
BRT t;
char *malloced_name=0;
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
WHEN_BRTTRACE(fprintf(stderr, "BRTTRACE: open_brt(%s, \"%s\", %d, %p, %d, %p)\n",
fname, dbname, is_create, newbrt, nodesize, cachetable));
if ((MALLOC(t))==0) {
assert(errno==ENOMEM);
r = ENOMEM;
if (0) { died0: my_free(t); }
return r;
}
if (dbname) {
malloced_name = mystrdup(dbname);
if (malloced_name==0) {
r = ENOMEM;
if (0) { died0a: if(malloced_name) my_free(malloced_name); }
goto died0;
}
}
t->database_name = malloced_name;
r=cachetable_openf(&t->cf, cachetable, fname, O_RDWR | (is_create ? O_CREAT : 0), 0777);
if (r!=0) {
if (0) { died1: cachefile_close(t->cf); }
goto died0a;
}
assert(nodesize>0);
//printf("%s:%d %d alloced\n", __FILE__, __LINE__, get_n_items_malloced()); print_malloced_items();
if (is_create) {
r = read_and_pin_brt_header(t->cf, &t->h);
if (r==-1) {
/* construct a new header. */
if ((MALLOC(t->h))==0) {
assert(errno==ENOMEM);
r = ENOMEM;
if (0) { died2: my_free(t->h); }
goto died1;
}
t->h->nodesize=nodesize;
t->h->freelist=-1;
t->h->unused_memory=2*nodesize;
if (dbname) {
t->h->unnamed_root = -1;
t->h->n_named_roots = 1;
if ((MALLOC_N(1, t->h->names))==0) { assert(errno==ENOMEM); r=ENOMEM; if (0) { died3: my_free(t->h->names); } goto died2; }
if ((MALLOC_N(1, t->h->roots))==0) { assert(errno==ENOMEM); r=ENOMEM; if (0) { died4: my_free(t->h->roots); } goto died3; }
if ((t->h->names[0] = mystrdup(dbname))==0) { assert(errno==ENOMEM); r=ENOMEM; if (0) { died5: my_free(t->h->names[0]); } goto died4; }
t->h->roots[0] = nodesize;
} else {
t->h->unnamed_root = nodesize;
t->h->n_named_roots = -1;
t->h->names=0;
t->h->roots=0;
}
if ((r=setup_brt_root_node(t, nodesize))!=0) { if (dbname) goto died5; else goto died2; }
if ((r=cachetable_put(t->cf, 0, t->h, brtheader_flush_callback, brtheader_fetch_callback, 0))) { if (dbname) goto died5; else goto died2; }
} else {
int i;
assert(r==0);
assert(t->h->unnamed_root==-1);
assert(t->h->n_named_roots>=0);
for (i=0; i<t->h->n_named_roots; i++) {
if (strcmp(t->h->names[i], dbname)==0) {
r = EEXIST;
goto died1; /* deallocate everything. */
}
}
if ((t->h->names = my_realloc(t->h->names, (1+t->h->n_named_roots)*sizeof(*t->h->names))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto died1; }
if ((t->h->roots = my_realloc(t->h->roots, (1+t->h->n_named_roots)*sizeof(*t->h->roots))) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto died1; }
t->h->n_named_roots++;
if ((t->h->names[t->h->n_named_roots-1] = mystrdup(dbname)) == 0) { assert(errno==ENOMEM); r=ENOMEM; goto died1; }
printf("%s:%d t=%p\n", __FILE__, __LINE__, t);
t->h->roots[t->h->n_named_roots-1] = malloc_diskblock_header_is_in_memory(t, t->h->nodesize);
if ((r=setup_brt_root_node(t, t->h->roots[t->h->n_named_roots-1]))!=0) goto died1;
}
} else {
if ((r = read_and_pin_brt_header(t->cf, &t->h))!=0) goto died1;
if (!dbname) {
if (t->h->n_named_roots!=-1) { r = -2; /* invalid args??? */; goto died1; }
} else {
int i;
for (i=0; i<t->h->n_named_roots; i++) {
if (strcmp(t->h->names[i], dbname)==0) {
goto found_it;
}
}
r=ENOENT; /* the database doesn't exist */
goto died1;
}
found_it: ;
}
assert(t->h);
if ((r = unpin_brt_header(t)) !=0) goto died1;
assert(t->h==0);
WHEN_BRTTRACE(fprintf(stderr, "BRTTRACE -> %p\n", t));
t->cursors_head = t->cursors_tail = 0;
*newbrt = t;
return 0;
}
int close_brt (BRT brt) {
int r;
while (brt->cursors_head) {
BRT_CURSOR c = brt->cursors_head;
r=brt_cursor_close(c);
if (r!=0) return r;
}
assert(0==cachefile_assert_all_unpinned(brt->cf));
//printf("%s:%d closing cachetable\n", __FILE__, __LINE__);
if ((r = cachefile_close(brt->cf))!=0) return r;
if (brt->database_name) my_free(brt->database_name);
my_free(brt);
return 0;
}
int brt_debug_mode = 0;//strcmp(key,"hello387")==0;
CACHEKEY* calculate_root_offset_pointer (BRT brt) {
if (brt->database_name==0) {
return &brt->h->unnamed_root;
} else {
int i;
for (i=0; i<brt->h->n_named_roots; i++) {
if (strcmp(brt->database_name, brt->h->names[i])==0) {
return &brt->h->roots[i];
}
}
}
abort();
}
int brt_insert (BRT brt, bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen) {
void *node_v;
BRTNODE node;
CACHEKEY *rootp;
int r;
int did_split; BRTNODE nodea=0, nodeb=0; bytevec splitkey; ITEMLEN splitkeylen;
int debug = brt_debug_mode;//strcmp(key,"hello387")==0;
//assert(0==cachetable_assert_all_unpinned(brt->cachetable));
if ((r = read_and_pin_brt_header(brt->cf, &brt->h))) {
if (0) { died0: unpin_brt_header(brt); }
return r;
}
rootp = calculate_root_offset_pointer(brt);
if (debug) printf("%s:%d Getting %lld\n", __FILE__, __LINE__, *rootp);
if ((r=cachetable_get_and_pin(brt->cf, *rootp, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)brt->h->nodesize))) {
goto died0;
}
node=node_v;
if (debug) printf("%s:%d node inserting\n", __FILE__, __LINE__);
r = brtnode_insert(brt, node, key, keylen, val, vallen,
&did_split, &nodea, &nodeb, &splitkey, &splitkeylen,
debug);
if (r!=0) return r;
if (debug) printf("%s:%d did_insert\n", __FILE__, __LINE__);
if (did_split) {
//printf("%s:%d did_split=%d nodeb=%p nodeb->thisnodename=%lld nodeb->nodesize=%d\n", __FILE__, __LINE__, did_split, nodeb, nodeb->thisnodename, nodeb->nodesize);
//printf("Did split, splitkey=%s\n", splitkey);
if (nodeb->height>0) assert(nodeb->u.n.children[nodeb->u.n.n_children-1]!=0);
assert(nodeb->nodesize>0);
}
if (did_split) {
/* We must cope. */
BRTNODE MALLOC(newroot);
diskoff newroot_diskoff=malloc_diskblock(brt, brt->h->nodesize);
assert(newroot);
*rootp=newroot_diskoff;
brt->h->dirty=1;
initialize_brtnode (brt, newroot, newroot_diskoff, nodea->height+1);
newroot->u.n.n_children=2;
//printf("%s:%d Splitkey=%p %s\n", __FILE__, __LINE__, splitkey, splitkey);
newroot->u.n.childkeys[0] = splitkey;
newroot->u.n.childkeylens[0] = splitkeylen;
newroot->u.n.totalchildkeylens=splitkeylen;
newroot->u.n.children[0]=nodea->thisnodename;
newroot->u.n.children[1]=nodeb->thisnodename;
r=hashtable_create(&newroot->u.n.htables[0]); if (r!=0) return r;
r=hashtable_create(&newroot->u.n.htables[1]); if (r!=0) return r;
verify_counts(newroot);
r=cachetable_unpin(brt->cf, nodea->thisnodename, 1); if (r!=0) return r;
r=cachetable_unpin(brt->cf, nodeb->thisnodename, 1); if (r!=0) return r;
//printf("%s:%d put %lld\n", __FILE__, __LINE__, brt->root);
cachetable_put(brt->cf, newroot_diskoff, newroot,
brtnode_flush_callback, brtnode_fetch_callback, (void*)brt->h->nodesize);
} else {
if (node->height>0)
assert(node->u.n.n_children<=TREE_FANOUT);
}
cachetable_unpin(brt->cf, *rootp, 1);
if ((r = unpin_brt_header(brt))!=0) return r;
//assert(0==cachetable_assert_all_unpinned(brt->cachetable));
return 0;
}
// This is pretty ugly.
static unsigned char lookup_result[1000000];
int brt_lookup_node (BRT brt, diskoff off, bytevec key, ITEMLEN keylen, bytevec *val, ITEMLEN *vallen) {
void *node_v;
int r = cachetable_get_and_pin(brt->cf, off, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)brt->h->nodesize);
bytevec answer;
ITEMLEN answerlen;
BRTNODE node;
int childnum;
if (r!=0) {
int r2;
died0:
printf("%s:%d r=%d\n", __FILE__, __LINE__, r);
r2 = cachetable_unpin(brt->cf, off, 0);
return r;
}
node=node_v;
if (node->height==0) {
r = pma_lookup(node->u.l.buffer, key, keylen, &answer, &answerlen);
//printf("%s:%d looked up something, got answerlen=%d\n", __FILE__, __LINE__, answerlen);
if (r!=0) goto died0;
if (r==0) {
*val = answer;
*vallen = answerlen;
}
r = cachetable_unpin(brt->cf, off, 0);
return r;
}
childnum = brtnode_which_child(node, key, keylen);
// Leaves have a single mdict, where the data is found.
if (hash_find (node->u.n.htables[childnum], key, keylen, &answer, vallen)==0) {
//printf("Found %d bytes\n", *vallen);
assert(*vallen<=(int)(sizeof(lookup_result)));
memcpy(lookup_result, answer, *vallen);
//printf("Returning %s\n", lookup_result);
*val = lookup_result;
r = cachetable_unpin(brt->cf, off, 0);
assert(r==0);
return 0;
}
if (node->height==0) {
r = cachetable_unpin(brt->cf, off, 0);
if (r==0) return DB_NOTFOUND;
else return r;
}
{
int result = brt_lookup_node(brt, node->u.n.children[childnum], key, keylen, val, vallen);
r = cachetable_unpin(brt->cf, off, 0);
if (r!=0) return r;
return result;
}
}
int brt_lookup (BRT brt, bytevec key, unsigned int keylen, bytevec*val, unsigned int *vallen) {
int r;
CACHEKEY *rootp;
assert(0==cachefile_assert_all_unpinned(brt->cf));
if ((r = read_and_pin_brt_header(brt->cf, &brt->h))) {
printf("%s:%d\n", __FILE__, __LINE__);
if (0) { died0: unpin_brt_header(brt); }
printf("%s:%d returning %d\n", __FILE__, __LINE__, r);
assert(0==cachefile_assert_all_unpinned(brt->cf));
return r;
}
rootp = calculate_root_offset_pointer(brt);
if ((r = brt_lookup_node(brt, *rootp, key, keylen, val, vallen))) {
printf("%s:%d\n", __FILE__, __LINE__);
goto died0;
}
//printf("%s:%d r=%d", __FILE__, __LINE__, r); if (r==0) printf(" vallen=%d", *vallen); printf("\n");
if ((r = unpin_brt_header(brt))!=0) return r;
assert(0==cachefile_assert_all_unpinned(brt->cf));
return 0;
}
int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen, int recurse);
int dump_brtnode (BRT brt, diskoff off, int depth, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen) {
int result=0;
BRTNODE node;
void *node_v;
int r = cachetable_get_and_pin(brt->cf, off, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)brt->h->nodesize);
assert(r==0);
node=node_v;
result=verify_brtnode(brt, off, lorange, lolen, hirange, hilen, 0);
printf("%*sNode=%p\n", depth, "", node);
if (node->height>0) {
printf("%*sNode %lld nodesize=%d height=%d n_children=%d n_bytes_in_hashtables=%d keyrange=%s %s\n",
depth, "", off, node->nodesize, node->height, node->u.n.n_children, node->u.n.n_bytes_in_hashtables, (char*)lorange, (char*)hirange);
//printf("%s %s\n", lorange ? lorange : "NULL", hirange ? hirange : "NULL");
{
int i;
for (i=0; i< node->u.n.n_children-1; i++) {
printf("%*schild %d buffered (%d entries):\n", depth+1, "", i, hashtable_n_entries(node->u.n.htables[i]));
HASHTABLE_ITERATE(node->u.n.htables[i], key, keylen, data, datalen,
({
printf("%*s %s %s\n", depth+2, "", (char*)key, (char*)data);
assert(strlen((char*)key)+1==keylen);
assert(strlen((char*)data)+1==datalen);
}));
}
for (i=0; i<node->u.n.n_children; i++) {
printf("%*schild %d\n", depth, "", i);
if (i>0) {
printf("%*spivot %d=%s\n", depth+1, "", i-1, (char*)node->u.n.childkeys[i-1]);
}
dump_brtnode(brt, node->u.n.children[i], depth+4,
(i==0) ? lorange : node->u.n.childkeys[i-1],
(i==0) ? lolen : node->u.n.childkeylens[i-1],
(i==node->u.n.n_children-1) ? hirange : node->u.n.childkeys[i],
(i==node->u.n.n_children-1) ? hilen : node->u.n.childkeylens[i]
);
}
}
} else {
printf("%*sNode %lld nodesize=%d height=%d n_bytes_in_buffer=%d keyrange=%s %s\n",
depth, "", off, node->nodesize, node->height, node->u.l.n_bytes_in_buffer, (char*)lorange, (char*)hirange);
PMA_ITERATE(node->u.l.buffer, key, keylen, val, vallen,
( keylen=keylen, vallen=vallen, printf(" %s:%s", (char*)key, (char*)val)));
printf("\n");
}
r = cachetable_unpin(brt->cf, off, 0);
assert(r==0);
return result;
}
int dump_brt (BRT brt) {
int r;
CACHEKEY *rootp;
if ((r = read_and_pin_brt_header(brt->cf, &brt->h))) {
if (0) { died0: unpin_brt_header(brt); }
return r;
}
rootp = calculate_root_offset_pointer(brt);
printf("split_count=%d\n", split_count);
if ((r = dump_brtnode(brt, *rootp, 0, 0, 0, 0, 0))) goto died0;
if ((r = unpin_brt_header(brt))!=0) return r;
return 0;
}
int show_brtnode_blocknumbers (BRT brt, diskoff off) {
BRTNODE node;
void *node_v;
int i,r;
assert(off%brt->h->nodesize==0);
if ((r = cachetable_get_and_pin(brt->cf, off, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)brt->h->nodesize))) {
if (0) { died0: cachetable_unpin(brt->cf, off, 0); }
return r;
}
node=node_v;
printf(" %lld", off/brt->h->nodesize);
if (node->height>0) {
for (i=0; i<node->u.n.n_children; i++) {
if ((r=show_brtnode_blocknumbers(brt, node->u.n.children[i]))) goto died0;
}
}
r = cachetable_unpin(brt->cf, off, 0);
return r;
}
int show_brt_blocknumbers (BRT brt) {
int r;
CACHEKEY *rootp;
if ((r = read_and_pin_brt_header(brt->cf, &brt->h))) {
if (0) { died0: unpin_brt_header(brt); }
return r;
}
rootp = calculate_root_offset_pointer(brt);
printf("BRT %p has blocks:", brt);
if ((r=show_brtnode_blocknumbers (brt, *rootp))) goto died0;
printf("\n");
if ((r = unpin_brt_header(brt))!=0) return r;
return 0;
}
int verify_brtnode (BRT brt, diskoff off, bytevec lorange, ITEMLEN lolen, bytevec hirange, ITEMLEN hilen, int recurse) {
int result=0;
BRTNODE node;
void *node_v;
int r;
if ((r = cachetable_get_and_pin(brt->cf, off, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)brt->h->nodesize)))
return r;
node=node_v;
if (node->height>0) {
int i;
for (i=0; i< node->u.n.n_children-1; i++) {
bytevec thislorange,thishirange;
ITEMLEN thislolen, thishilen;
if (node->u.n.n_children==0 || i==0) {
thislorange=lorange;
thislolen =lolen;
} else {
thislorange=node->u.n.childkeys[i-1];
thislolen =node->u.n.childkeylens[i-1];
}
if (node->u.n.n_children==0 || i+1>=node->u.n.n_children) {
thishirange=hirange;
thishilen =hilen;
} else {
thishirange=node->u.n.childkeys[i];
thishilen =node->u.n.childkeylens[i];
}
{
void verify_pair (bytevec key, unsigned int keylen,
bytevec data __attribute__((__unused__)), unsigned int datalen __attribute__((__unused__)),
void *ignore __attribute__((__unused__))) {
if (thislorange) assert(keycompare(thislorange,thislolen,key,keylen)<0);
if (thishirange && keycompare(key,keylen,thishirange,thishilen)>0) {
printf("%s:%d in buffer %d key %s is bigger than %s\n", __FILE__, __LINE__, i, (char*)key, (char*)thishirange);
result=1;
}
}
hashtable_iterate(node->u.n.htables[i], verify_pair, 0);
}
}
for (i=0; i<node->u.n.n_children; i++) {
if (i>0) {
if (lorange) assert(keycompare(lorange,lolen, node->u.n.childkeys[i-1], node->u.n.childkeylens[i-1])<0);
if (hirange) assert(keycompare(node->u.n.childkeys[i-1], node->u.n.childkeylens[i-1], hirange, hilen)<=0);
}
if (recurse) {
result|=verify_brtnode(brt, node->u.n.children[i],
(i==0) ? lorange : node->u.n.childkeys[i-1],
(i==0) ? lolen : node->u.n.childkeylens[i-1],
(i==node->u.n.n_children-1) ? hirange : node->u.n.childkeys[i],
(i==node->u.n.n_children-1) ? hilen : node->u.n.childkeylens[i],
recurse);
}
}
}
if ((r = cachetable_unpin(brt->cf, off, 0))) return r;
return result;
}
int verify_brt (BRT brt) {
int r;
CACHEKEY *rootp;
if ((r = read_and_pin_brt_header(brt->cf, &brt->h))) {
if (0) { died0: unpin_brt_header(brt); }
return r;
}
rootp = calculate_root_offset_pointer(brt);
if ((r=verify_brtnode(brt, *rootp, 0, 0, 0, 0, 1))) goto died0;
if ((r = unpin_brt_header(brt))!=0) return r;
return 0;
}
#if 0
void brt_fsync (BRT brt) {
int r = cachetable_fsync(brt->cachetable);
assert(r==0);
r = fsync(brt->fd);
assert(r==0);
}
void brt_flush (BRT brt) {
int r = cachetable_flush(brt->cachetable, brt);
assert(r==0);
}
#endif
int brtnode_flush_child (BRT brt, BRTNODE node, int cnum) {
brt=brt; node=node; cnum=cnum;
abort(); /* Algorithm: For each key in the cnum'th mdict, insert it to the childnode. It may cause a split. */
}
#define CURSOR_PATHLEN_LIMIT 256
struct brt_cursor {
BRT brt;
int path_len; /* -1 if the cursor points nowhere. */
BRTNODE path[CURSOR_PATHLEN_LIMIT]; /* Include the leaf (last). These are all pinned. */
int pathcnum[CURSOR_PATHLEN_LIMIT]; /* which child did we descend to from here? */
PMA_CURSOR pmacurs; /* The cursor into the leaf. NULL if the cursor doesn't exist. */
BRT_CURSOR prev,next;
};
static int unpin_cursor (BRT_CURSOR cursor);
int brt_cursor (BRT brt, BRT_CURSOR*cursor) {
BRT_CURSOR MALLOC(result);
assert(result);
result->brt = brt;
result->path_len = 0;
result->pmacurs = 0;
if (brt->cursors_head) {
brt->cursors_head->prev = result;
} else {
brt->cursors_tail = result;
}
result->next = brt->cursors_head;
result->prev = 0;
brt->cursors_head = result;
*cursor = result;
return 0;
}
int brt_cursor_close (BRT_CURSOR curs) {
BRT brt = curs->brt;
int r=unpin_cursor(curs);
if (curs->prev==0) {
assert(brt->cursors_head==curs);
brt->cursors_head = curs->next;
} else {
curs->prev->next = curs->next;
}
if (curs->next==0) {
assert(brt->cursors_tail==curs);
brt->cursors_tail = curs->prev;
} else {
curs->next->prev = curs->prev;
}
if (curs->pmacurs) {
int r2=pma_cursor_free(&curs->pmacurs);
if (r==0) r=r2;
}
my_free(curs);
return r;
}
int brtcurs_set_position_last (BRT_CURSOR cursor, diskoff off) {
BRT brt=cursor->brt;
void *node_v;
int r = cachetable_get_and_pin(brt->cf, off, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)brt->h->nodesize);
if (r!=0) {
if (0) { died0: cachetable_unpin(brt->cf, off, 0); }
return r;
}
BRTNODE node = node_v;
assert(cursor->path_len<CURSOR_PATHLEN_LIMIT);
cursor->path[cursor->path_len++] = node;
if (node->height>0) {
int childnum = node->u.n.n_children-1;
try_prev_child:
cursor->pathcnum[cursor->path_len-1] = childnum;
r=brtcurs_set_position_last (cursor, node->u.n.children[childnum]);
if (r==DB_NOTFOUND) {
if (childnum>0) {
childnum--;
goto try_prev_child;
}
}
if (r!=0) {
/* we ran out of children without finding anything, or had some other trouble. */
cursor->path_len--;
goto died0;
}
return 0;
} else {
r=pma_cursor(node->u.l.buffer, &cursor->pmacurs);
if (r!=0) {
if (0) { died10: pma_cursor_free(&cursor->pmacurs); }
cursor->path_len--;
goto died0;
}
r=pma_cursor_set_position_last(cursor->pmacurs);
if (r!=0) goto died10; /* we'll deallocate this cursor, and unpin this node, and go back up. */
return 0;
}
}
int brtcurs_set_position_first (BRT_CURSOR cursor, diskoff off) {
BRT brt=cursor->brt;
void *node_v;
int r = cachetable_get_and_pin(brt->cf, off, &node_v,
brtnode_flush_callback, brtnode_fetch_callback, (void*)brt->h->nodesize);
if (r!=0) {
if (0) { died0: cachetable_unpin(brt->cf, off, 0); }
return r;
}
BRTNODE node = node_v;
assert(cursor->path_len<CURSOR_PATHLEN_LIMIT);
cursor->path[cursor->path_len++] = node;
if (node->height>0) {
int childnum = 0;
try_next_child:
cursor->pathcnum[cursor->path_len-1] = childnum;
r=brtcurs_set_position_first (cursor, node->u.n.children[childnum]);
if (r==DB_NOTFOUND) {
if (childnum+1<node->u.n.n_children) {
childnum++;
goto try_next_child;
}
}
if (r!=0) {
/* we ran out of children without finding anything, or had some other trouble. */
cursor->path_len--;
goto died0;
}
return 0;
} else {
r=pma_cursor(node->u.l.buffer, &cursor->pmacurs);
if (r!=0) {
if (0) { died10: pma_cursor_free(&cursor->pmacurs); }
cursor->path_len--;
goto died0;
}
r=pma_cursor_set_position_first(cursor->pmacurs);
if (r!=0) goto died10; /* we'll deallocate this cursor, and unpin this node, and go back up. */
return 0;
}
}
static int unpin_cursor (BRT_CURSOR cursor) {
BRT brt=cursor->brt;
int i;
int r=0;
for (i=0; i<cursor->path_len; i++) {
int r2 = cachetable_unpin(brt->cf, cursor->path[i]->thisnodename, 0);
if (r==0) r=r2;
}
cursor->path_len=0;
return r;
}
int brt_c_get (BRT_CURSOR cursor, DBT *kbt, DBT *vbt, int flags) {
int do_rmw=0;
int r;
CACHEKEY *rootp;
dump_brt(cursor->brt);
assert(0==cachefile_assert_all_unpinned(cursor->brt->cf));
if ((r = read_and_pin_brt_header(cursor->brt->cf, &cursor->brt->h))) {
if (0) { died0: unpin_brt_header(cursor->brt); }
return r;
}
rootp = calculate_root_offset_pointer(cursor->brt);
if (flags&DB_RMW) {
do_rmw=1;
flags &= ~DB_RMW;
}
switch (flags) {
case DB_LAST:
r=unpin_cursor(cursor); if (r!=0) goto died0;
r=brtcurs_set_position_last(cursor, *rootp); if (r!=0) goto died0;
r=pma_cget_current(cursor->pmacurs, kbt, vbt);
break;
case DB_FIRST:
r=unpin_cursor(cursor); if (r!=0) goto died0;
r=brtcurs_set_position_first(cursor, *rootp); if (r!=0) goto died0;
r=pma_cget_current(cursor->pmacurs, kbt, vbt);
break;
default:
fprintf(stderr, "%s:%d c_get(...,%d) not ready\n", __FILE__, __LINE__, flags);
abort();
}
if ((r = unpin_brt_header(cursor->brt))!=0) return r;
return 0;
}
#ifndef BRT_H
#define BRT_H
// This must be first to make the 64-bit file mode work right in Linux
#define _FILE_OFFSET_BITS 64
#include "brttypes.h"
#include "ybt.h"
#include "../include/ydb-constants.h"
#include "cachetable.h"
typedef struct brt *BRT;
int open_brt (const char *fname, const char *dbname, int is_create, BRT *, int nodesize, CACHETABLE);
//int brt_create (BRT **, int nodesize, int n_nodes_in_cache); /* the nodesize and n_nodes in cache really should be separately configured. */
//int brt_open (BRT *, char *fname, char *dbname);
int brt_insert (BRT brt, bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen);
int brt_lookup (BRT brt, bytevec key, ITEMLEN keylen, bytevec*val, ITEMLEN *vallen);
int close_brt (BRT);
int dump_brt (BRT brt);
void brt_fsync (BRT); /* fsync, but don't clear the caches. */
void brt_flush (BRT); /* fsync and clear the caches. */
int brt_create_cachetable (CACHETABLE *t, int n_cachlines /* Pass 0 if you want the default. */);
extern int brt_debug_mode;
int verify_brt (BRT brt);
int show_brt_blocknumbers(BRT);
typedef struct brt_cursor *BRT_CURSOR;
int brt_cursor (BRT, BRT_CURSOR*);
int brt_c_get (BRT_CURSOR cursor, DBT *kbt, DBT *vbt, int brtc_flags);
int brt_cursor_close (BRT_CURSOR curs);
#endif
#ifndef BRTTYPES_H
#define BRTTYPES_H
#define _XOPEN_SOURCE 500
#define _FILE_OFFSET_BITS 64
typedef unsigned int ITEMLEN;
typedef const void *bytevec;
//typedef const void *bytevec;
#endif
#include "memory.h"
#include "cachetable.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
struct item {
CACHEKEY key;
char *something;
};
int expect_n_flushes=0;
CACHEKEY flushes[100];
static void expect1(CACHEKEY key) {
expect_n_flushes=1;
flushes[0]=key;
}
static void expectN(CACHEKEY key) {
flushes[expect_n_flushes++]=key;
}
CACHEFILE expect_f;
static void flush (CACHEFILE f, CACHEKEY key, void*value, int write_me __attribute__((__unused__)), int keep_mee __attribute__((__unused__))) {
struct item *it = value;
int i;
printf("Flushing %lld (it=>key=%lld)\n", key, it->key);
assert(expect_f==f);
assert(strcmp(it->something,"something")==0);
assert(it->key==key);
/* Verify that we expected the flush. */
for (i=0; i<expect_n_flushes; i++) {
if (key==flushes[i]) {
flushes[i] = flushes[expect_n_flushes-1];
expect_n_flushes--;
goto found_flush;
}
}
printf("%lld was flushed, but I didn't expect it\n", key);
abort();
found_flush:
my_free(value);
}
struct item *make_item (CACHEKEY key) {
struct item *MALLOC(it);
it->key=key;
it->something="something";
return it;
}
CACHEKEY did_fetch=-1;
int fetch (CACHEFILE f, CACHEKEY key, void**value, void*extraargs) {
printf("Fetch %lld\n", key);
assert (expect_f==f);
assert((long)extraargs==23);
*value = make_item(key);
did_fetch=key;
return 0;
}
void test0 (void) {
void* t3=(void*)23;
CACHETABLE t;
CACHEFILE f;
int r;
char fname[] = "test.dat";
r=create_cachetable(&t, 5);
assert(r==0);
unlink(fname);
r = cachetable_openf(&f, t, fname, O_RDWR|O_CREAT, 0777);
assert(r==0);
expect_f = f;
expect_n_flushes=0;
r=cachetable_put(f, 1, make_item(1), flush, fetch, t3); /* 1P */ /* this is the lru list. 1 is pinned. */
assert(r==0);
assert(expect_n_flushes==0);
expect_n_flushes=0;
r=cachetable_put(f, 2, make_item(2), flush, fetch, t3);
assert(r==0);
r=cachetable_unpin(f, 2, 1); /* 2U 1P */
assert(expect_n_flushes==0);
expect_n_flushes=0;
r=cachetable_put(f, 3, make_item(3), flush, fetch, t3);
assert(r==0);
assert(expect_n_flushes==0); /* 3P 2U 1P */ /* 3 is most recently used (pinned), 2 is next (unpinned), 1 is least recent (pinned) */
expect_n_flushes=0;
r=cachetable_put(f, 4, make_item(4), flush, fetch, t3);
assert(r==0);
assert(expect_n_flushes==0); /* 4P 3P 2U 1P */
expect_n_flushes=0;
r=cachetable_put(f, 5, make_item(5), flush, fetch, t3);
assert(r==0);
r=cachetable_unpin(f, 5, 1);
assert(r==0);
r=cachetable_unpin(f, 3, 1);
assert(r==0);
assert(expect_n_flushes==0); /* 5U 4P 3U 2U 1P */
expect1(2); /* 2 is the oldest unpinned item. */
r=cachetable_put(f, 6, make_item(6), flush, fetch, t3); /* 6P 5U 4P 3U 1P */
assert(r==0);
assert(expect_n_flushes==0);
expect1(3);
r=cachetable_put(f, 7, make_item(7), flush, fetch, t3);
assert(r==0);
assert(expect_n_flushes==0);
r=cachetable_unpin(f, 7, 1); /* 7U 6P 5U 4P 1P */
assert(r==0);
{
void *item_v=0;
expect_n_flushes=0;
r=cachetable_get_and_pin(f, 5, &item_v, flush, fetch, t3); /* 5P 7U 6P 4P 1P */
assert(r==0);
assert(((struct item *)item_v)->key==5);
assert(strcmp(((struct item *)item_v)->something,"something")==0);
assert(expect_n_flushes==0);
}
{
void *item_v=0;
r=cachetable_unpin(f, 4, 1);
assert(r==0);
expect1(4);
did_fetch=-1;
r=cachetable_get_and_pin(f, 2, &item_v, flush, fetch, t3); /* 2p 5P 7U 6P 1P */
assert(r==0);
assert(did_fetch==2); /* Expect that 2 is fetched in. */
assert(((struct item *)item_v)->key==2);
assert(strcmp(((struct item *)item_v)->something,"something")==0);
assert(expect_n_flushes==0);
}
r=cachetable_unpin(f, 2, 1);
assert(r==0);
r=cachetable_unpin(f ,5, 1);
assert(r==0);
r=cachetable_unpin(f, 6, 1);
assert(r==0);
r=cachetable_unpin(f, 1, 1);
assert(r==0);
r=cachetable_assert_all_unpinned(t);
assert(r==0);
printf("Closing\n");
expect1(2);
expectN(5);
expectN(7);
expectN(6);
expectN(1);
r=cachefile_close(f);
assert(r==0);
r=cachetable_close(t);
assert(r==0);
assert(expect_n_flushes==0);
expect_f = 0;
memory_check_all_free();
}
static void flush_n (CACHEFILE f __attribute__((__unused__)), CACHEKEY key __attribute__((__unused__)), void *value, int write_me __attribute__((__unused__)), int keep_me __attribute__((__unused__))) {
int *v = value;
assert(*v==0);
}
static int fetch_n (CACHEFILE f __attribute__((__unused__)), CACHEKEY key __attribute__((__unused__)), void**value, void*extraargs) {
assert((long)extraargs==42);
*value=0;
return 0;
}
void test_nested_pin (void) {
void *f2=(void*)42;
CACHETABLE t;
CACHEFILE f;
int i0, i1;
int r;
void *vv;
char fname[] = "test.dat";
r = create_cachetable(&t, 1);
assert(r==0);
unlink(fname);
r = cachetable_openf(&f, t, fname, O_RDWR|O_CREAT, 0777);
assert(r==0);
expect_f = f;
i0=0; i1=0;
r = cachetable_put(f, 1, &i0, flush_n, fetch_n, f2);
assert(r==0);
r = cachetable_get_and_pin(f, 1, &vv, flush_n, fetch_n, f2);
assert(r==0);
assert(vv==&i0);
assert(i0==0);
r = cachetable_unpin(f, 1, 0);
assert(r==0);
r = cachetable_put(f, 2, &i1, flush_n, fetch_n, f2);
assert(r!=0); // previously pinned, we shouldn't be able to put.
r = cachetable_unpin(f, 1, 0);
assert(r==0);
r = cachetable_put(f, 2, &i1, flush_n, fetch_n, f2);
assert(r==0); // now it is unpinned, we can put it.
}
void null_flush (CACHEFILE cf __attribute__((__unused__)),
CACHEKEY k __attribute__((__unused__)),
void *v __attribute__((__unused__)),
int write_me __attribute__((__unused__)),
int keep_me __attribute__((__unused__))) {
}
int add123_fetch (CACHEFILE cf __attribute__((__unused__)), CACHEKEY key, void **value, void*extraargs) {
assert((long)extraargs==123);
*value = (void*)((unsigned long)key+123L);
return 0;
}
int add222_fetch (CACHEFILE cf __attribute__((__unused__)), CACHEKEY key, void **value, void*extraargs) {
assert((long)extraargs==222);
*value = (void*)((unsigned long)key+222L);
return 0;
}
void test_multi_filehandles (void) {
CACHETABLE t;
CACHEFILE f1,f2,f3;
char fname1[]="test.dat";
char fname2[]="test2.dat";
char fname3[]="test3.dat";
int r;
void *v;
unlink(fname1);
unlink(fname2);
r = create_cachetable(&t, 4); assert(r==0);
r = cachetable_openf(&f1, t, fname1, O_RDWR|O_CREAT, 0777); assert(r==0);
r = link(fname1, fname2); assert(r==0);
r = cachetable_openf(&f2, t, fname2, O_RDWR|O_CREAT, 0777); assert(r==0);
r = cachetable_openf(&f3, t, fname3, O_RDWR|O_CREAT, 0777); assert(r==0);
assert(f1==f2);
assert(f1!=f3);
r = cachetable_put(f1, 1, (void*)124, null_flush, add123_fetch, (void*)123); assert(r==0);
r = cachetable_get_and_pin(f2, 1, &v, null_flush, add123_fetch, (void*)123); assert(r==0);
assert((unsigned long)v==124);
r = cachetable_get_and_pin(f2, 2, &v, null_flush, add123_fetch, (void*)123); assert(r==0);
assert((unsigned long)v==125);
r = cachetable_get_and_pin(f3, 2, &v, null_flush, add222_fetch, (void*)222); assert(r==0);
assert((unsigned long)v==224);
r = cachetable_maybe_get_and_pin(f1, 2, &v); assert(r==0);
assert((unsigned long)v==125);
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
test0();
test_nested_pin();
test_multi_filehandles ();
printf("ok\n");
return 0;
}
#include "cachetable.h"
#include "memory.h"
#include "yerror.h"
#include <assert.h>
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
#include <sys/stat.h>
#include <string.h>
//#define TRACE_CACHETABLE
#ifdef TRACE_CACHETABLE
#define WHEN_TRACE_CT(x) x
#else
#define WHEN_TRACE_CT(x) ((void)0)
#endif
typedef struct ctpair *PAIR;
struct ctpair {
long long pinned;
char dirty;
CACHEKEY key;
void *value;
PAIR next,prev; // In LRU list.
PAIR hash_chain;
CACHEFILE cachefile;
void (*flush_callback)(CACHEFILE,CACHEKEY,void*, int write_me, int keep_me);
int (*fetch_callback)(CACHEFILE,CACHEKEY,void**,void*extrargs);
void*extraargs;
};
struct cachetable {
enum typ_tag tag;
int n_in_table;
int table_size;
PAIR *table;
PAIR head,tail; // of LRU list. head is the most recently used. tail is least recently used.
CACHEFILE cachefiles;
};
struct fileid {
dev_t st_dev; /* device and inode are enough to uniquely identify a file in unix. */
ino_t st_ino;
};
struct cachefile {
CACHEFILE next;
int refcount; /* CACHEFILEs are shared. Use a refcount to decide when to really close it. */
int fd; /* Bug: If a file is opened read-only, then it is stuck in read-only. If it is opened read-write, then subsequent writers can write to it too. */
CACHETABLE cachetable;
struct fileid fileid;
};
int create_cachetable (CACHETABLE *result, int n_entries) {
CACHETABLE MALLOC(t);
int i;
t->n_in_table = 0;
t->table_size = n_entries;
t->table = my_calloc(t->table_size, sizeof(struct ctpair));
assert(t->table);
t->head = t->tail = 0;
for (i=0; i<t->table_size; i++) {
t->table[i]=0;
}
t->cachefiles = 0;
*result = t;
return 0;
}
int cachetable_openf (CACHEFILE *cf, CACHETABLE t, const char *fname, int flags, mode_t mode) {
int r;
CACHEFILE extant;
struct stat statbuf;
struct fileid fileid;
int fd = open(fname, flags, mode);
if (fd<0) return errno;
memset(&fileid, 0, sizeof(fileid));
r=fstat(fd, &statbuf);
assert(r==0);
fileid.st_dev = statbuf.st_dev;
fileid.st_ino = statbuf.st_ino;
for (extant = t->cachefiles; extant; extant=extant->next) {
if (memcmp(&extant->fileid, &fileid, sizeof(fileid))==0) {
close(fd);
extant->refcount++;
*cf = extant;
return 0;
}
}
{
CACHEFILE MALLOC(newcf);
newcf->next = t->cachefiles;
newcf->refcount = 1;
newcf->fd = fd;
newcf->cachetable = t;
newcf->fileid = fileid;
t->cachefiles = newcf;
*cf = newcf;
return 0;
}
}
CACHEFILE remove_cf_from_list (CACHEFILE cf, CACHEFILE list) {
if (list==0) return 0;
else if (list==cf) {
return list->next;
} else {
list->next = remove_cf_from_list(cf, list->next);
return list;
}
}
int cachefile_flush (CACHEFILE cf);
int cachefile_close (CACHEFILE cf) {
assert(cf->refcount>0);
cf->refcount--;
if (cf->refcount==0) {
int r;
if ((r = cachefile_flush(cf))) return r;
r = close(cf->fd);
cf->cachetable->cachefiles = remove_cf_from_list(cf, cf->cachetable->cachefiles);
my_free(cf);
return r;
} else {
return 0;
}
}
int cachetable_assert_all_unpinned (CACHETABLE t) {
int i;
int some_pinned=0;
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
assert(p->pinned>=0);
if (p->pinned) {
printf("%s:%d pinned: %lld (%p)\n", __FILE__, __LINE__, p->key, p->value);
some_pinned=1;
}
}
}
return some_pinned;
}
int cachefile_assert_all_unpinned (CACHEFILE cf) {
int i;
int some_pinned=0;
CACHETABLE t = cf->cachetable;
for (i=0; i<t->table_size; i++) {
PAIR p;
for (p=t->table[i]; p; p=p->hash_chain) {
assert(p->pinned>=0);
if (p->pinned && p->cachefile==cf) {
printf("%s:%d pinned: %lld (%p)\n", __FILE__, __LINE__, p->key, p->value);
some_pinned=1;
}
}
}
return some_pinned;
}
static unsigned int hash_key (const char *key, int keylen) {
/* From Sedgewick. There are probably better hash functions. */
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;
int i;
for (i = 0; i < keylen; i++ ) {
hash = hash * a + key[i];
a *= b;
}
return hash;
}
static unsigned int hashit (CACHETABLE t, CACHEKEY key) {
return hash_key((char*)&key, sizeof(key))%t->table_size;
}
static void lru_remove (CACHETABLE t, PAIR p) {
if (p->next) {
p->next->prev = p->prev;
} else {
assert(t->tail==p);
t->tail = p->prev;
}
if (p->prev) {
p->prev->next = p->next;
} else {
assert(t->head==p);
t->head = p->next;
}
p->prev = p->next = 0;
}
static void lru_add_to_list (CACHETABLE t, PAIR p) {
// requires that touch_me is not currently in the table.
assert(p->prev==0);
p->prev = 0;
p->next = t->head;
if (t->head) {
t->head->prev = p;
} else {
assert(!t->tail);
t->tail = p;
}
t->head = p;
}
static void lru_touch (CACHETABLE t, PAIR p) {
lru_remove(t,p);
lru_add_to_list(t,p);
}
static PAIR remove_from_hash_chain (PAIR remove_me, PAIR list) {
if (remove_me==list) return list->hash_chain;
list->hash_chain = remove_from_hash_chain(remove_me, list->hash_chain);
return list;
}
static void flush_and_remove (CACHETABLE t, PAIR remove_me, int write_me) {
unsigned int h = hashit(t, remove_me->key);
lru_remove(t, remove_me);
//printf("flush_callback(%lld,%p)\n", remove_me->key, remove_me->value);
WHEN_TRACE_CT(printf("%s:%d CT flush_callback(%lld, %p, %p, dirty=%d, 0)\n", __FILE__, __LINE__, remove_me->key, remove_me->value, remove_me->otherargs, remove_me->dirty && write_me));
remove_me->flush_callback(remove_me->cachefile, remove_me->key, remove_me->value, remove_me->dirty && write_me, 0);
t->n_in_table--;
// Remove it from the hash chain.
t->table[h] = remove_from_hash_chain (remove_me, t->table[h]);
my_free(remove_me);
}
static void flush_and_keep (PAIR flush_me) {
if (flush_me->dirty) {
WHEN_TRACE_CT(printf("%s:%d CT flush_callback(%lld, %p, %p, dirty=1, 0)\n", __FILE__, __LINE__, flush_me->key, flush_me->value, flush_me->otherargs));
flush_me->flush_callback(flush_me->cachefile, flush_me->key, flush_me->value, 1, 1);
flush_me->dirty=0;
}
}
static int maybe_flush_some (CACHETABLE t) {
again:
if (t->n_in_table>=t->table_size) {
/* Try to remove one. */
PAIR remove_me;
for (remove_me = t->tail; remove_me; remove_me = remove_me->prev) {
if (!remove_me->pinned) {
flush_and_remove(t, remove_me, 1);
goto again;
}
}
/* All were pinned. */
printf("All are pinned\n");
return 1;
}
return 0;
}
int cachetable_put (CACHEFILE cachefile, CACHEKEY key, void*value,
void (*flush_callback)(CACHEFILE,CACHEKEY,void*, int /*write_me*/, int /*keep_me*/),
int (*fetch_callback)(CACHEFILE,CACHEKEY,void**,void*/*extraargs*/),
void*extraargs
) {
int h = hashit(cachefile->cachetable, key);
PAIR p;
WHEN_TRACE_CT(printf("%s:%d CT cachetable_put(%lld)=%p\n", __FILE__, __LINE__, key, value));
for (p=cachefile->cachetable->table[h]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
// Semantically, these two asserts are not strictly right. After all, when are two functions eq?
// In practice, the functions better be the same.
assert(p->flush_callback==flush_callback);
assert(p->fetch_callback==fetch_callback);
return -1; /* Already present. */
}
}
if (maybe_flush_some(cachefile->cachetable)) return -2;
MALLOC(p);
p->pinned=1;
p->dirty =1;
p->key = key;
p->value = value;
p->next = p->prev = 0;
p->cachefile = cachefile;
p->flush_callback = flush_callback;
p->fetch_callback = fetch_callback;
p->extraargs = extraargs;
lru_add_to_list(cachefile->cachetable, p);
p->hash_chain = cachefile->cachetable->table[h];
cachefile->cachetable->table[h] = p;
cachefile->cachetable->n_in_table++;
return 0;
}
int cachetable_get_and_pin (CACHEFILE cachefile, CACHEKEY key, void**value,
void(*flush_callback)(CACHEFILE,CACHEKEY,void*,int write_me, int keep_me),
int(*fetch_callback)(CACHEFILE, CACHEKEY key, void**value,void*extraargs), /* If we are asked to fetch something, get it by calling this back. */
void*extraargs
) {
CACHETABLE t = cachefile->cachetable;
int h = hashit(t,key);
PAIR p;
for (p=t->table[h]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
*value = p->value;
p->pinned++;
lru_touch(t,p);
WHEN_TRACE_CT(printf("%s:%d cachtable_get_and_pin(%lld)--> %p\n", __FILE__, __LINE__, key, *value));
return 0;
}
}
if (maybe_flush_some(t)) return -2;
{
void *my_value;
int r;
WHEN_TRACE_CT(printf("%s:%d CT: fetch_callback(%lld...)\n", __FILE__, __LINE__, key));
if ((r=fetch_callback(cachefile, key, &my_value,extraargs))) return r;
cachetable_put(cachefile, key, my_value, flush_callback, fetch_callback,extraargs);
*value = my_value;
}
WHEN_TRACE_CT(printf("%s:%d did fetch: cachtable_get_and_pin(%lld)--> %p\n", __FILE__, __LINE__, key, *value));
return 0;
}
int cachetable_maybe_get_and_pin (CACHEFILE cachefile, CACHEKEY key, void**value) {
CACHETABLE t = cachefile->cachetable;
int h = hashit(t,key);
PAIR p;
for (p=t->table[h]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
*value = p->value;
p->pinned++;
lru_touch(t,p);
printf("%s:%d cachtable_maybe_get_and_pin(%lld)--> %p\n", __FILE__, __LINE__, key, *value);
return 0;
}
}
return -1;
}
int cachetable_unpin (CACHEFILE cachefile, CACHEKEY key, int dirty) {
CACHETABLE t = cachefile->cachetable;
int h = hashit(t,key);
PAIR p;
WHEN_TRACE_CT(printf("%s:%d unpin(%lld)\n", __FILE__, __LINE__, key));
for (p=t->table[h]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
assert(p->pinned>0);
p->pinned--;
p->dirty |= dirty;
return 0;
}
}
return 0;
}
int cachetable_flush (CACHETABLE t) {
int i;
for (i=0; i<t->table_size; i++) {
PAIR p;
while ((p = t->table[i]))
flush_and_remove(t, p, 1); // Must be careful, since flush_and_remove kills the linked list.
}
return 0;
}
int cachefile_flush (CACHEFILE cf) {
int i;
CACHETABLE t = cf->cachetable;
for (i=0; i<t->table_size; i++) {
PAIR p;
again:
p = t->table[i];
while (p) {
if (p->cachefile==cf) {
flush_and_remove(t, p, 1); // Must be careful, since flush_and_remove kills the linked list.
goto again;
} else {
p=p->next;
}
}
}
return 0;
}
/* Require that it all be flushed. */
int cachetable_close (CACHETABLE t) {
int i;
int r;
if ((r=cachetable_flush(t))) return r;
for (i=0; i<t->table_size; i++) {
if (t->table[i]) return -1;
}
my_free(t->table);
my_free(t);
return 0;
}
int cachetable_remove (CACHEFILE cachefile, CACHEKEY key, int write_me) {
/* Removing something already present is OK. */
CACHETABLE t = cachefile->cachetable;
int h = hashit(t,key);
PAIR p;
for (p=t->table[h]; p; p=p->hash_chain) {
if (p->key==key && p->cachefile==cachefile) {
flush_and_remove(t, p, write_me);
return 0;
}
}
return 0;
}
static int cachetable_fsync_pairs (CACHETABLE t, PAIR p) {
if (p) {
int r = cachetable_fsync_pairs(t, p->hash_chain);
if (r!=0) return r;
flush_and_keep(p);
}
return 0;
}
int cachetable_fsync (CACHETABLE t) {
int i;
int r;
for (i=0; i<t->table_size; i++) {
r=cachetable_fsync_pairs(t, t->table[i]);
if (r!=0) return r;
}
return 0;
}
#if 0
int cachefile_pwrite (CACHEFILE cf, const void *buf, size_t count, off_t offset) {
ssize_t r = pwrite(cf->fd, buf, count, offset);
if (r==-1) return errno;
assert((size_t)r==count);
return 0;
}
int cachefile_pread (CACHEFILE cf, void *buf, size_t count, off_t offset) {
ssize_t r = pread(cf->fd, buf, count, offset);
if (r==-1) return errno;
if (r==0) return -1; /* No error for EOF ??? */
assert((size_t)r==count);
return 0;
}
#endif
int cachefile_fd (CACHEFILE cf) {
return cf->fd;
}
#ifndef CACHETABLE_H
#define CACHETABLE_H
#include <fcntl.h>
/* Implement the cache table. */
typedef long long CACHEKEY;
typedef struct cachetable *CACHETABLE;
typedef struct cachefile *CACHEFILE;
/* Maintain a cache mapping from cachekeys to values (void*)
* Some of the keys can be pinned. Don't pin too many or for too long.
* If the cachetable is too full, it will call the flush_callback() function with the key, the value, and the otherargs
and then remove the key-value pair from the cache.
* The callback won't be any of the currently pinned keys.
* Also when flushing an object, the cachetable drops all references to it,
* so you may need to free() it.
* Note: The cachetable should use a common pool of memory, flushing things across cachetables.
* (The first implementation doesn't)
* If you pin something twice, you must unpin it twice.
*/
int create_cachetable (CACHETABLE */*result*/, int /*n_entries*/);
int cachetable_openf (CACHEFILE *,CACHETABLE, const char */*fname*/, int flags, mode_t mode);
/* Error if already present. On success, pin the value. */
int cachetable_put (CACHEFILE, CACHEKEY, void*/*value*/,
void(*flush_callback)(CACHEFILE, CACHEKEY key, void*value, int write_me, int keep_me),
int(*fetch_callback)(CACHEFILE, CACHEKEY key, void**value,void*extraargs), /* If we are asked to fetch something, get it by calling this back. */
void*extraargs
);
int cachetable_get_and_pin (CACHEFILE, CACHEKEY, void**/*value*/,
void(*flush_callback)(CACHEFILE,CACHEKEY,void*,int write_me, int keep_me),
int(*fetch_callback)(CACHEFILE, CACHEKEY key, void**value,void*extraargs), /* If we are asked to fetch something, get it by calling this back. */
void*extraargs
);
/* If the the item is already in memory, then return 0 and store it in the void**.
* If the item is not in memory, then return nonzero. */
int cachetable_maybe_get_and_pin (CACHEFILE, CACHEKEY, void**);
int cachetable_unpin (CACHEFILE, CACHEKEY, int dirty); /* Note whether it is dirty when we unpin it. */
int cachetable_remove (CACHEFILE, CACHEKEY, int /*write_me*/); /* Removing something already present is OK. */
int cachetable_assert_all_unpinned (CACHETABLE);
int cachefile_assert_all_unpinned (CACHEFILE);
//int cachetable_fsync_all (CACHETABLE); /* Flush everything to disk, but keep it in cache. */
int cachetable_close (CACHETABLE); /* Flushes everything to disk, and destroys the cachetable. */
int cachefile_close (CACHEFILE);
//int cachefile_flush (CACHEFILE); /* Flush everything related to the VOID* to disk and free all memory. Don't destroy the cachetable. */
// Return on success (different from pread and pwrite)
//int cachefile_pwrite (CACHEFILE, const void *buf, size_t count, off_t offset);
//int cachefile_pread (CACHEFILE, void *buf, size_t count, off_t offset);
int cachefile_fd (CACHEFILE);
#endif
/* Hash table with chaining. */
#include "hashtable.h"
#include "memory.h"
#include "../include/ydb-constants.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
#include "key.h"
#include "yerror.h"
int hashtable_create (HASHTABLE *h) {
HASHTABLE MALLOC(tab);
int i;
if (tab==0) return -1;
tab->n_keys=0;
tab->arraysize=128;
assert(sizeof(*tab->array)==sizeof(void*));
tab->array = my_calloc(tab->arraysize, sizeof(*tab->array));
for (i=0; i<tab->arraysize; i++) tab->array[i]=0;
*h=tab;
return 0;
}
static unsigned int hash_key (const char *key, ITEMLEN keylen) {
/* From Sedgewick. There are probably better hash functions. */
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;
ITEMLEN i;
for (i = 0; i < keylen; i++ ) {
hash = hash * a + key[i];
a *= b;
}
return hash;
}
static void hash_find_internal (HASHTABLE tab, const char *key, ITEMLEN keylen, HASHELT *hashelt, HASHELT **prev_ptr) {
unsigned int h = hash_key (key, keylen) % tab->arraysize;
HASHELT he;
HASHELT *prev = &tab->array[h];
for (he=*prev; he; prev=&he->next, he=*prev) {
if (keylen==he->keylen && memcmp(key, he->key, keylen)==0) {
*prev_ptr = prev;
*hashelt = he;
return;
}
}
*prev_ptr = prev;
*hashelt = 0;
}
int hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec *data, ITEMLEN *datalen) {
HASHELT he, *prev_ptr;
hash_find_internal(tab, key, keylen, &he, &prev_ptr);
if (he==0) {
return -1;
} else {
*data = he->val;
*datalen = he->vallen;
return 0;
}
}
int hash_insert (HASHTABLE tab, const char *key, ITEMLEN keylen, const char *val, ITEMLEN vallen)
{
unsigned int h = hash_key (key,keylen)%tab->arraysize;
{
HASHELT he,*prev_ptr;
hash_find_internal(tab, key, keylen, &he, &prev_ptr);
if (he!=0) {
return BRT_ALREADY_THERE;
}
}
{
/* Otherwise the key is not already present, so we need to add it. */
HASHELT MALLOC(he);
he->key = memdup(key, keylen);
he->keylen = keylen;
he->val = memdup(val, vallen);
he->vallen = vallen;
he->next = tab->array[h];
tab->array[h]=he;
tab->n_keys++;
if (tab->n_keys > tab->arraysize) {
int newarraysize = tab->arraysize*2;
HASHELT *newarray = my_calloc(newarraysize, sizeof(*tab->array));
int i;
assert(newarray!=0);
for (i=0; i<newarraysize; i++) newarray[i]=0;
for (i=0; i<tab->arraysize; i++) {
while ((he=tab->array[i])!=0) {
h = hash_key(he->key, he->keylen)%newarraysize;
tab->array[i] = he->next;
he->next = newarray[h];
newarray[h] = he;
}
}
my_free(tab->array);
// printf("Freed\n");
tab->array=newarray;
tab->arraysize=newarraysize;
//printf("Done growing\n");
}
return BRT_OK;
}
}
int hash_delete (HASHTABLE tab, const char *key, ITEMLEN keylen) {
HASHELT he, *prev_ptr;
//printf("%s:%d deleting %s (bucket %d)\n", __FILE__, __LINE__, key, hash_key(key,keylen)%tab->arraysize);
hash_find_internal(tab, key, keylen, &he, &prev_ptr);
if (he==0) return DB_NOTFOUND;
else {
//printf("%s:%d deleting %s %s\n", __FILE__, __LINE__, he->key, he->val);
assert(*prev_ptr==he);
*prev_ptr = he->next;
//printf("Freeing %s %s\n", he->key, he->val);
my_free(he->key);
my_free(he->val);
my_free(he);
tab->n_keys--;
return BRT_OK;
}
}
int hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen) {
int i;
for (i=0; i<h->arraysize; i++) {
HASHELT he=h->array[i];
if (he) {
*key = he->key;
*keylen = he->keylen;
*data = he->val;
*datalen = he->vallen;
return 0;
}
}
return -1;
}
#if 0
int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen) {
bytevec best_k=0, best_d;
ITEMLEN best_kl, best_dl;
HASHTABLE_ITERATE(h, this_k, this_kl, this_d, this_dl,
({
if (best_k==0 || keycompare(best_k, best_kl, this_k, this_kl)<0) {
best_k = this_k;
best_kl = this_kl;
best_d = this_d;
best_dl = this_dl;
}
}));
if (best_k) {
*key = best_k;
*keylen = best_kl;
*data = best_d;
*datalen = best_dl;
return 0;
} else {
return -1;
}
}
#endif
void hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen, void*args), void* args) {
/*
int i;
for (i=0; i<tab->arraysize; i++) {
HASHELT he;
for (he=tab->array[i]; he; he=he->next) {
f(he->key, he->keylen, he->val, he->vallen, args);
}
}
*/
HASHTABLE_ITERATE(tab, key, keylen, val, vallen, f(key,keylen,val,vallen,args));
}
int hashtable_n_entries(HASHTABLE tab) {
return tab->n_keys;
}
/* Frees the list, but doesn't free the keys. */
static void hasheltlist_free (HASHELT elt) {
if (elt==0) return;
else {
hasheltlist_free(elt->next);
my_free(elt->key);
my_free(elt->val);
my_free(elt);
}
}
/* Frees the table, but doesn't do anything to the contents of the table. The keys are still alloc'd. The internal storage of the hashtable is freed. */
void hashtable_free(HASHTABLE *tab) {
//printf("%s:%d free hashtable %p\n", __FILE__, __LINE__, tab);
hashtable_clear(*tab);
//printf("%s:%d free %p\n", __FILE__, __LINE__, tab);n
my_free((*tab)->array);
my_free(*tab);
*tab=0;
}
void hashtable_clear(HASHTABLE tab) {
int i;
for (i=0; i<tab->arraysize; i++) {
hasheltlist_free(tab->array[i]);
tab->array[i]=0;
}
tab->n_keys = 0;
}
#ifndef HASHTABLE_H
#define HASHTABLE_H
#include "brttypes.h"
/* Hash table with chaining. */
/* The keys and values are byte sequences. */
/* The keys and values are malloc'd by the hashtable. */
typedef struct hashtable *HASHTABLE;
int hashtable_create (HASHTABLE*);
/* Return 0 if the key is found in the hashtable, -1 otherwise. */
/* Warning: The data returned points to the internals of the hashtable. It is set to "const" to try to prevent you from messing it up. */
int hash_find (HASHTABLE tab, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen);
/* Replace the key if it was already there. */
int hash_insert (HASHTABLE tab, const char *key, ITEMLEN keylen, const char *data, ITEMLEN datalen);
/* It is OK to delete something that isn't there. */
int hash_delete (HASHTABLE tab, const char *key, ITEMLEN keylen);
void hashtable_free(HASHTABLE *tab);
int hashtable_n_entries(HASHTABLE);
void hashtable_clear(HASHTABLE);
int hashtable_random_pick(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
//int hashtable_find_last(HASHTABLE h, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
typedef struct hashelt *HASHELT;
struct hashelt {
char *key; ITEMLEN keylen; /* key is NULL for empty elements */
char *val; ITEMLEN vallen;
HASHELT next;
};
struct hashtable {
int n_keys;
int arraysize;
HASHELT *array;
};
/* You cannot add or delete elements from the hashtable while iterating. */
void hashtable_iterate (HASHTABLE tab, void(*f)(bytevec key,ITEMLEN keylen,bytevec data,ITEMLEN datalen,void*), void*);
// If you don't want to use something, do something like use "key __attribute__((__unused__))" for keyvar.
#define HASHTABLE_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \
int hi_counter; \
for (hi_counter=0; hi_counter<table->arraysize; hi_counter++) { \
HASHELT hi_he; \
for (hi_he=table->array[hi_counter]; hi_he; hi_he=hi_he->next) { \
const char *keyvar = hi_he->key; \
ITEMLEN keylenvar = hi_he->keylen; \
const char *datavar = hi_he->val; \
ITEMLEN datalenvar = hi_he->vallen; \
body; \
}}})
#endif
#include "key.h"
#include "hashtable.h"
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
void verify_hash_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
int N, int *data, char *saw) {
char *kv = (char*)kv_v;
char *dv = (char*)dv_v;
int num, k;
assert(kv[0]=='k');
assert(dv[0]=='d');
assert(strcmp(kv+1, dv+1)==0);
assert(strlen(kv)+1==kl);
assert(strlen(dv)+1==dl);
num = atoi(kv+1);
for (k=0; k<N; k++) {
if (data[k]==num) {
assert(!saw[k]);
saw[k]=1;
return;
}
}
fprintf(stderr, "%s isn't there\n", kv); abort();
}
void verify_htable_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
int N, int *data, char *saw) {
char *kv = (char*)kv_v;
char *dv = (char*)dv_v;
int num, k;
assert(kv[0]=='k');
assert(dv[0]=='d');
assert(strcmp(kv+1, dv+1)==0);
assert(strlen(kv)+1==kl);
assert(strlen(dv)+1==dl);
num = atoi(kv+1);
for (k=0; k<N; k++) {
if (data[k]==num) {
assert(!saw[k]);
saw[k]=1;
return;
}
}
fprintf(stderr, "%s isn't there\n", kv); abort();
}
void verify_htable (HASHTABLE htable, int N, int *data, char *saw) {
int j;
for (j=0; j<N; j++) {
saw[j]=0;
}
HASHTABLE_ITERATE(htable, kv, kl, dv, dl,
verify_htable_instance (kv, kl, dv, dl,
N, data, saw));
for (j=0; j<N; j++) {
assert(saw[j]);
}
}
void test0 (void) {
int r, i, j;
HASHTABLE htable;
int n_ops=1000;
int *data=malloc(sizeof(*data)*n_ops);
char*saw =malloc(sizeof(*saw)*n_ops);
int data_n = 0;
assert(data!=0);
r = hashtable_create(&htable); assert(r==0);
assert(hashtable_n_entries(htable)==0);
#if 0
{
bytevec kv=(void*)0xdeadbeef;
bytevec dv=(void*)0xbeefdead;
ITEMLEN kl=42, dl=43;
r = mdict_find_last(htable,&kv,&kl,&dv,&dl);
assert(r!=0);
assert((unsigned long)kv==0xdeadbeef);
assert((unsigned long)dv==0xbeefdead);
assert(kl==42);
assert(dl==43);
}
#endif
for (i=0; i<n_ops; i++) {
if (random()%4==1) {
// Delete something random
} else if (random()%2 == 0) {
// Insert something
try_another_random:
{
int ra = random()%(1<<30);
char kv[100], dv[100];
for (j=0; j<data_n; j++) {
if (ra==data[j]) goto try_another_random;
}
snprintf(kv, 99, "k%d", ra);
snprintf(dv, 99, "d%d", ra);
hash_insert(htable, kv, strlen(kv)+1, dv, strlen(dv)+1);
data[data_n++]=ra;
}
} else {
// Look up something
}
verify_htable(htable, data_n, data, saw);
}
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
test0();
return 0;
}
#include "brttypes.h"
#include "brt-internal.h"
#include "memory.h"
#include <sys/types.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
int read_sint (int fd, int *result) {
unsigned char b[4];
int r = read(fd, b, 4);
if (r!=4) return 1;
*result = (b[0]<<24) | (b[1]<<16) | (b[2]<<8) | (b[3]<<0);
return 0;
}
int read_uint (int fd, unsigned int *result) {
int sresult;
int r = read_sint(fd, &sresult);
if (r==0) { *result = r; }
return r;
}
int write_int (int fd, unsigned int v) {
unsigned char b[4];
int r;
b[0] = (v>>24)&0xff;
b[1] = (v>>16)&0xff;
b[2] = (v>>8)&0xff;
b[3] = (v>>0)&0xff;
r = write(fd, b, 4);
if (r!=4) return 1;
return 0;
}
int read_diskoff (int fd, diskoff *result) {
unsigned int i0,i1;
int r;
r = read_uint(fd, &i0); if(r!=0) return r;
r = read_uint(fd, &i1); if(r!=0) return r;
*result = ((unsigned long long)i0)<<32 | ((unsigned long long)i1);
return 0;
}
int write_diskoff (int fd, diskoff v) {
int r;
r = write_int(fd, (unsigned int)(v>>32)); if (r!=0) return r;
r = write_int(fd, (unsigned int)(v&0xffffffff)); if (r!=0) return r;
return 0;
}
int read_bytes (int fd, int l, char *s) {
int r = read(fd, s, l);
if (r==l) return 0;
return -1;
}
int write_bytes (int fd, int l, char *s) {
int r= write(fd, s, l);
if (r==l) return 0;
return -1;
}
int read_brt_header (int fd, struct brt_header *header) {
{
off_t r = lseek(fd, 0, SEEK_SET);
assert(r==0);
}
/* Ignore magic for now. We'll need some magic at the beginning of the file. */
{
int r;
r = read_uint(fd, &header->nodesize);
if (r!=0) return -1;
r = read_diskoff(fd, &header->freelist); assert(r==0); /* These asserts should do something smarter. */
r = read_diskoff(fd, &header->unused_memory); assert(r==0);
r = read_sint(fd, &header->n_named_roots); assert(r==0);
if (header->n_named_roots>0) {
int i;
header->unnamed_root = -1;
MALLOC_N(header->n_named_roots, header->names);
MALLOC_N(header->n_named_roots, header->roots);
for (i=0; i<header->n_named_roots; i++) {
unsigned int l;
char *s;
r = read_diskoff(fd, &header->roots[i]); assert(r==0);
r = read_uint(fd, &l); assert(r==0); /* count includes the trailing null. */
MALLOC_N(l, s);
r = read_bytes(fd, l, s); assert(r==0);
assert(l>0 && s[l-1]==0);
header->names[i] = s;
}
} else {
r = read_diskoff(fd, &header->unnamed_root); assert(r==0);
header->names = 0;
header->roots = 0;
}
}
return 0;
}
int read_brt_h_unused_memory (int fd, diskoff *unused_memory) {
off_t r = lseek(fd, 12, SEEK_SET);
assert(r==12);
r = read_diskoff(fd, unused_memory);
return r;
}
int write_brt_h_unused_memory (int fd, diskoff unused_memory) {
off_t r = lseek(fd, 12, SEEK_SET);
assert(r==12);
r = write_diskoff(fd, unused_memory);
return r;
}
#include "brt-internal.h"
#include <assert.h>
#include <string.h>
int keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len) {
if (key1len==key2len) {
return memcmp(key1,key2,key1len);
} else if (key1len<key2len) {
int r = memcmp(key1,key2,key1len);
if (r<=0) return -1; /* If the keys are the same up to 1's length, then return -1, since key1 is shorter than key2. */
else return 1;
} else {
return -keycompare(key2,key2len,key1,key1len);
}
}
void test_keycompare (void) {
assert(keycompare("a",1, "a",1)==0);
assert(keycompare("aa",2, "a",1)>0);
assert(keycompare("a",1, "aa",2)<0);
assert(keycompare("b",1, "aa",2)>0);
assert(keycompare("aa",2, "b",1)<0);
assert(keycompare("aaaba",5, "aaaba",5)==0);
assert(keycompare("aaaba",5, "aaaaa",5)>0);
assert(keycompare("aaaaa",5, "aaaba",5)<0);
assert(keycompare("aaaaa",3, "aaaba",3)==0);
}
#include "brttypes.h"
int keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
void test_keycompare (void) ;
#include "mdict.h"
#include <stdlib.h>
#include <assert.h>
#include <stdio.h>
#include <string.h>
void verify_mdict_instance (bytevec kv_v, ITEMLEN kl, bytevec dv_v, ITEMLEN dl,
int N, int *data, char *saw) {
char *kv = (char*)kv_v;
char *dv = (char*)dv_v;
int num, k;
assert(kv[0]=='k');
assert(dv[0]=='d');
assert(strcmp(kv+1, dv+1)==0);
assert(strlen(kv)+1==kl);
assert(strlen(dv)+1==dl);
num = atoi(kv+1);
for (k=0; k<N; k++) {
if (data[k]==num) {
assert(!saw[k]);
saw[k]=1;
return;
}
}
fprintf(stderr, "%s isn't there\n", kv); abort();
}
void verify_mdict (MDICT mdict, int N, int *data, char *saw) {
int j;
for (j=0; j<N; j++) {
saw[j]=0;
}
MDICT_ITERATE(mdict, kv, kl, dv, dl,
verify_mdict_instance (kv, kl, dv, dl,
N, data, saw));
for (j=0; j<N; j++) {
assert(saw[j]);
}
}
void test0 (void) {
int r, i, j;
MDICT mdict;
int n_ops=1000;
int *data=malloc(sizeof(*data)*n_ops);
char*saw =malloc(sizeof(*saw)*n_ops);
int data_n = 0;
assert(data!=0);
r = mdict_create(&mdict); assert(r==0);
assert(mdict_n_entries(mdict)==0);
{
bytevec kv=(void*)0xdeadbeef;
bytevec dv=(void*)0xbeefdead;
ITEMLEN kl=42, dl=43;
r = mdict_find_last(mdict,&kv,&kl,&dv,&dl);
assert(r!=0);
assert((unsigned long)kv==0xdeadbeef);
assert((unsigned long)dv==0xbeefdead);
assert(kl==42);
assert(dl==43);
}
for (i=0; i<n_ops; i++) {
if (random()%4==1) {
// Delete something random
} else if (random()%2 == 0) {
// Insert something
try_another_random:
{
int ra = random()%(1<<30);
char kv[100], dv[100];
for (j=0; j<data_n; j++) {
if (ra==data[j]) goto try_another_random;
}
snprintf(kv, 99, "k%d", ra);
snprintf(dv, 99, "d%d", ra);
mdict_insert(mdict, kv, strlen(kv)+1, dv, strlen(dv)+1);
data[data_n++]=ra;
}
} else {
// Look up something
}
verify_mdict(mdict, data_n, data, saw);
}
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
test0();
return 0;
}
#include "mdict.h"
#include "memory.h"
#define USEPMA
#ifdef USEPMA
#include "pma.h"
struct mdict {
PMA pma;
};
int mdict_create (MDICT* mdict) {
MDICT result;
int r;
MALLOC(result);
if (result==0) return -1;
r = pma_create(&result->pma);
if (r==0) {
*mdict = result;
}
return r;
}
void mdict_free (MDICT m) {
pma_free(m->pma);
my_free(m);
}
int mdict_n_entries (MDICT m) {
return pma_n_entries(m->pma);
}
/* Returns an error if the key is already present. */
/* The values returned should not be modified. */
/* May damage the cursor. */
int mdict_insert (MDICT m, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen) {
return pma_insert(m->pma, key, keylen, data, datalen);
}
/* This returns an error if the key is NOT present. */
int mdict_replace (MDICT, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen);
/* This returns an error if the key is NOT present. */
int mdict_delete (MDICT m, bytevec key, ITEMLEN keylen) {
return pma_delete(m->pma, key, keylen);
}
/* Exposes internals of the MDICT by returning a pointer to the guts.
* Don't modify the returned data. Don't free it. */
int mdict_lookup (MDICT m, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen) {
return pma_lookup(m->pma, key, keylen, data, datalen);
}
int mdict_random_pick(MDICT m, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen) {
return pma_random_pick(m->pma, key, keylen, data, datalen);
}
void mdict_iterate (MDICT m, void(*f)(bytevec,ITEMLEN,bytevec,ITEMLEN, void*), void*v) {
pma_iterate(m->pma, f, v);
}
#else
foo
#endif
#ifndef MDICT_H
#define MDICT_H
#include "brttypes.h"
//#define USEPMA
#define USEHASH
int keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
#ifdef USEPMA
#include "pma.h"
#define MDICT PMA
#define MDICT_OK PMA_OK
#define MDICT_NOTFOUND PMA_NOTFOUND
#define mdict_free pma_free
#define mdict_n_entries pma_n_entries
#define MDICT_ITERATE PMA_ITERATE
#define mdict_insert pma_insert
#define mdict_create pma_create
#define mdict_delete pma_delete
#define mdict_lookup pma_lookup
#define mdict_random_pick pma_random_pick
#define mdict_iterate pma_iterate
#elif defined(USEHASH)
#include "hashtable.h"
#define MDICT HASHTABLE
#define MDICT_OK 0
#define MDICT_NOTFOUND -1
#define MDICT_ALREADY_THERE -2
#define mdict_free hashtable_free
#define mdict_n_entries hashtable_n_entries
#define MDICT_ITERATE HASHTABLE_ITERATE
#define mdict_insert hash_insert
#define mdict_create hashtable_create
#define mdict_delete hash_delete
#define mdict_lookup hash_find
#define mdict_random_pick hashtable_random_pick
#define mdict_iterate hashtable_iterate
#define mdict_find_last hashtable_find_last
#else
/* In-memory dictionary. */
enum mdict_errors { MDICT_OK=0, MDICT_NOTFOUND = -1, MDICT_ALREADY_THERE = -2 };
typedef struct mdict *MDICT;
int mdict_create (MDICT*);
void mdict_free (MDICT);
int mdict_n_entries (MDICT);
/* Returns an error if the key is already present. */
/* The values returned should not be modified. */
/* May damage the cursor. */
int mdict_insert (MDICT, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen);
/* This returns an error if the key is NOT present. */
int mdict_replace (MDICT, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen);
/* This returns an error if the key is NOT present. */
int mdict_delete (MDICT, bytevec key, ITEMLEN keylen);
/* Exposes internals of the MDICT by returning a pointer to the guts.
* Don't modify the returned data. Don't free it. */
int mdict_lookup (MDICT, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen);
int mdict_random_pick(MDICT, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
void mdict_iterate (MDICT, void(*)(bytevec,ITEMLEN,bytevec,ITEMLEN, void*), void*);
#define MDICT_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \
void __do_iterate(bytevec keyvar, ITEMLEN keylenvar, bytevec datavar, ITEMLEN datalenvar, void *__ignore __attribute__((__unused__))) { \
body; \
} \
mdict_iterate(table,__do_iterate, 0); \
})
#endif
#endif
#include "memory.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
int memory_check=1;
#define WHEN_MEM_DEBUG(x) ({if (memory_check) ({x});})
long long n_items_malloced=0;
/* Memory checking */
enum { items_limit = 1000 };
int overflowed=0;
static void *items[items_limit];
static long sizes[items_limit];
void note_did_malloc (void *p, long size) {
WHEN_MEM_DEBUG(
if (n_items_malloced<items_limit) { items[n_items_malloced]=p; sizes[n_items_malloced]=size; }
else overflowed=1;
//printf("%s:%d %p=malloc(%ld)\n", __FILE__, __LINE__, r, size);
);
n_items_malloced++;
}
void note_did_free(void *p) {
WHEN_MEM_DEBUG(
if (!overflowed) {
int i;
//printf("not overflowed\n");
for (i=0; i<n_items_malloced; i++) {
if (items[i]==p) {
items[i]=items[n_items_malloced-1];
sizes[i]=sizes[n_items_malloced-1];
// printf("items[%d] replaced, now %p\n", i, items[i]);
goto ok;
}
}
printf("%s:%d freed something (%p) not alloced\n", __FILE__, __LINE__, p);
abort();
ok:;
}
//printf("%s:%d free(%p)\n", __FILE__, __LINE__, p);
);
n_items_malloced--;
}
//#define BUFFERED_MALLOC
#ifdef BUFFERED_MALLOC
enum { BUFFERING = 4096 };
void mark_buffer (char *p, int size) {
unsigned int *pl = (unsigned int*)p;
int i;
for (i=0; i<BUFFERING/4; i++) {
pl[i] = 0xdeadbeef;
}
pl[BUFFERING/8] = size;
}
int check_buffer (char *p) {
unsigned int *pl = (unsigned int*)p;
int i;
for (i=0; i<BUFFERING/4; i++) {
if (i!=BUFFERING/8) {
assert(pl[i] == 0xdeadbeef);
}
}
return pl[BUFFERING/8];
}
void check_all_buffers (void) {
int i;
if (!overflowed) {
for (i=0; i<n_items_malloced; i++) {
int size = check_buffer(((char*)items[i])-BUFFERING);
check_buffer(((char*)items[i])+size);
}
}
}
void *actual_malloc(long size) {
char *r = malloc(size+BUFFERING*2);
mark_buffer(r, size);
mark_buffer(r+size+BUFFERING, size);
check_all_buffers();
return r+BUFFERING;
}
void actual_free(void *pv) {
char *p = pv;
int size=check_buffer(p-BUFFERING);
check_buffer(p+size);
check_all_buffers();
//free(p-BUFFERING);
}
void *actual_realloc(void *pv, long size) {
check_all_buffers();
{
char *p = pv;
char *r = realloc(p-BUFFERING, size+BUFFERING*2);
mark_buffer(r, size);
mark_buffer(r+size+BUFFERING, size);
return r+BUFFERING;
}
}
void *actual_calloc (long nmemb, long size) {
return actual_malloc(nmemb*size);
}
void do_memory_check (void) {
check_all_buffers();
}
#else
#define actual_malloc malloc
#define actual_free free
#define actual_realloc realloc
#define actual_calloc calloc
#endif
void *my_calloc(long nmemb, long size) {
void *r;
errno=0;
r = actual_calloc(nmemb, size);
//printf("%s:%d calloc(%ld,%ld)->%p\n", __FILE__, __LINE__, nmemb, size, r);
note_did_malloc(r, nmemb*size);
//if ((long)r==0x80523f8) { printf("%s:%d %p\n", __FILE__, __LINE__, r); }
return r;
}
void *my_malloc(long size) {
void * r;
errno=0;
r=actual_malloc(size);
//printf("%s:%d malloc(%ld)->%p\n", __FILE__, __LINE__, size,r);
note_did_malloc(r, size);
//if ((long)r==0x80523f8) { printf("%s:%d %p size=%ld\n", __FILE__, __LINE__, r, size); }
return r;
}
void *tagmalloc(unsigned long size, int typtag) {
void *r = my_malloc(size);
assert(size>sizeof(int));
((int*)r)[0] = typtag;
return r;
}
void *my_realloc(void *p, long size) {
void *newp;
note_did_free(p);
errno=0;
newp = actual_realloc(p, size);
//printf("%s:%d realloc(%p,%ld)-->%p\n", __FILE__, __LINE__, p, size, newp);
note_did_malloc(newp, size);
return newp;
}
void my_free(void* p) {
//printf("%s:%d free(%p)\n", __FILE__, __LINE__, p);
note_did_free(p);
actual_free(p);
}
void *memdup (const void *v, unsigned int len) {
void *r=my_malloc(len);
memcpy(r,v,len);
return r;
}
char *mystrdup (const char *s) {
return memdup(s, strlen(s)+1);
}
void memory_check_all_free (void) {
if (n_items_malloced>0) {
printf("n_items_malloced=%lld\n", n_items_malloced);
if (memory_check)
printf(" one item is %p size=%ld\n", items[0], sizes[0]);
}
assert(n_items_malloced==0);
}
int get_n_items_malloced (void) { return n_items_malloced; }
void print_malloced_items (void) {
int i;
for (i=0; i<n_items_malloced; i++) {
printf(" %p size=%ld\n", items[i], sizes[i]);
}
}
//#include <stdlib.h>
/* errno is set to 0 or a value to indicate problems. */
void *my_calloc(long nmemb, long size);
void *my_malloc(long size);
void *tagmalloc(unsigned long size, int typ);
void my_free(void*);
void *my_realloc(void *, long size);
#define MALLOC(v) v = my_malloc(sizeof(*v))
#define MALLOC_N(n,v) v = my_malloc((n)*sizeof(*v))
#define TAGMALLOC(t,v) t v = tagmalloc(sizeof(*v), TYP_ ## t);
void *memdup (const void *v, unsigned int len);
char *mystrdup (const char *s);
void memory_check_all_free (void);
void do_memory_check(void);
extern int memory_check; // Set to nonzero to get a (much) slower version of malloc that does (much) more checking.
int get_n_items_malloced(void);
void print_malloced_items(void);
#include "myassert.h"
#include <stdio.h>
#include <stdlib.h>
#ifdef TESTER
void my_assert(int a, const char *f, int l) {
if (!a) { fprintf(stderr, "Assertion failed at %s:%d\n", f, l); abort(); }
}
#endif
#ifndef MYASSERT_H
#define MYASSERT_H
#ifndef TESTER
#include <assert.h>
#else
extern void my_assert(int, const char *, int);
#define assert(x) my_assert(x, __FILE__, __LINE__)
#endif
#endif
#include "pma.h"
struct pair {
bytevec key; /* NULL for empty slots */
int keylen;
bytevec val;
int vallen;
};
struct pma_cursor {
PMA pma;
int position; /* -1 if the position is undefined. */
PMA_CURSOR next,prev;
void *skey, *sval; /* used in dbts. */
};
struct pma {
enum typ_tag tag;
int N; /* How long is the array? Always a power of two >= 4. */
int n_pairs_present; /* How many array elements are non-null. */
struct pair *pairs;
int uplgN; /* The smallest power of two >= lg(N) */
double densitystep; /* Each doubling decreases the density by densitystep.
* For example if array_len=256 and uplgN=8 then there are 5 doublings.
* Regions of size 8 are full. Regions of size 16 are 90% full.
* Regions of size 32 are 80% full. Regions of size 64 are 70% full.
* Regions of size 128 are 60% full. Regions of size 256 are 50% full.
* The densitystep is 0.10. */
PMA_CURSOR cursors_head, cursors_tail;
};
int pmainternal_count_region (struct pair *pairs, int lo, int hi);
void pmainternal_calculate_parameters (PMA pma);
int pmainternal_smooth_region (struct pair *pairs, int n, int idx);
int pmainternal_printpairs (struct pair *pairs, int N);
int pmainternal_make_space_at (PMA pma, int idx);
int pmainternal_find (PMA pma, bytevec key, int keylen);
void print_pma (PMA pma); /* useful for debugging, so keep the name short. I.e., not pmainternal_print_pma() */
#include "pma-internal.h"
#include "../include/ydb-constants.h"
#include "memory.h"
#include "key.h"
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
static void test_make_space_at (void) {
PMA pma;
int r=pma_create(&pma);
assert(r==0);
assert(pma_n_entries(pma)==0);
r=pmainternal_make_space_at(pma, 2);
assert(pma_index_limit(pma)==4);
assert((unsigned long)pma->pairs[pma_index_limit(pma)].key==0xdeadbeefL);
print_pma(pma);
pma->pairs[2].key="A";
pma->n_pairs_present++;
r=pmainternal_make_space_at(pma,2);
printf("Requested space at 2, got space at %d\n", r);
print_pma(pma);
assert(pma->pairs[r].key==0);
assert((unsigned long)pma->pairs[pma_index_limit(pma)].key==0xdeadbeefL);
assert(pma_index_limit(pma)==4);
pma->pairs[0].key="A";
pma->pairs[1].key="B";
pma->pairs[2].key=0;
pma->pairs[3].key=0;
pma->n_pairs_present=2;
print_pma(pma);
r=pmainternal_make_space_at(pma,0);
printf("Requested space at 0, got space at %d\n", r);
print_pma(pma);
assert((unsigned long)pma->pairs[pma_index_limit(pma)].key==0xdeadbeefL); // make sure it doesn't go off the end.
assert(pma_index_limit(pma)==8);
pma->pairs[0].key = "A";
pma->pairs[1].key = 0;
pma->pairs[2].key = 0;
pma->pairs[3].key = 0;
pma->pairs[4].key = "B";
pma->pairs[5].key = 0;
pma->pairs[6].key = 0;
pma->pairs[7].key = 0;
pma->n_pairs_present=2;
print_pma(pma);
r=pmainternal_make_space_at(pma,5);
print_pma(pma);
printf("r=%d\n", r);
{
int i;
for (i=0; i<pma_index_limit(pma); i++) {
if (pma->pairs[i].key) {
assert(i<r);
}
pma->pairs[i].key=0; // zero it so that we don't mess things up on free
pma->pairs[i].val=0;
}
}
r=pma_free(&pma); assert(r==0);
assert(pma==0);
}
static void test_pma_find (void) {
PMA pma;
int i;
int r;
const int N = 16;
MALLOC(pma);
MALLOC_N(N,pma->pairs);
// All that is needed to test pma_find is N and pairs.
pma->N = N;
for (i=0; i<N; i++) pma->pairs[i].key=0;
assert(pma_index_limit(pma)==N);
r=pmainternal_find(pma, "hello", 5);
assert(r==0);
pma->pairs[5].key="hello";
pma->pairs[5].keylen=5;
assert(pma_index_limit(pma)==N);
r=pmainternal_find(pma, "hello", 5);
assert(pma_index_limit(pma)==N);
assert(r==5);
r=pmainternal_find(pma, "there", 5);
assert(r==6);
r=pmainternal_find(pma, "aaa", 3);
assert(r==0);
pma->pairs[N-1].key="there";
pma->pairs[N-1].keylen=5;
r=pmainternal_find(pma, "hello", 5);
assert(r==5);
r=pmainternal_find(pma, "there", 5);
assert(r==N-1);
r=pmainternal_find(pma, "aaa", 3);
assert(r==0);
r=pmainternal_find(pma, "hellob", 6);
assert(r==6);
r=pmainternal_find(pma, "zzz", 3);
assert(r==N);
my_free(pma->pairs);
my_free(pma);
}
void test_smooth_region_N (int N) {
struct pair pairs[N];
char *strings[100];
char string[100];
int i;
int len;
if (N<10) len=1;
else if (N<100) len=2;
else len=8;
for (i=0; i<N; i++) {
snprintf(string, 10, "%0*d", len, i);
strings[i] = strdup(string);
}
assert(N<30);
for (i=0; i<(1<<N)-1; i++) {
int insertat;
for (insertat=0; insertat<=N; insertat++) {
int j;
int r;
for (j=0; j<N; j++) {
if ((1<<j)&i) {
pairs[j].key = strings[j];
} else {
pairs[j].key = 0;
}
}
pmainternal_printpairs(pairs, N); printf(" at %d becomes f", insertat);
r = pmainternal_smooth_region(pairs, N, insertat);
pmainternal_printpairs(pairs, N); printf(" at %d\n", r);
assert(0<=r); assert(r<N);
assert(pairs[r].key==0);
/* Now verify that things are in the right place:
* everything before r should be smaller than keys[insertat].
* everything after is bigger.
* Also, make sure everything appeared. */
{
int cleari = i;
for (j=0; j<N; j++) {
if (pairs[j].key) {
int whichkey = atoi(pairs[j].key);
assert(cleari&(1<<whichkey));
cleari &= ~(1<<whichkey);
if (whichkey<insertat) assert(j<r);
else assert(j>r);
}
}
assert(cleari==0);
}
}
}
}
void test_smooth_region6 (void) {
enum {N=7};
struct pair pairs[N] = {{.key="A"},{.key="B"},{.key=0},{.key=0},{.key=0},{.key=0},{.key=0}};
int r = pmainternal_smooth_region(pairs, N, 2);
printf("{%s %s %s %s %s %s %s} %d\n",
(char*)pairs[0].key, (char*)pairs[1].key, (char*)pairs[2].key, (char*)pairs[3].key, (char*)pairs[4].key, (char*)pairs[5].key, (char*)pairs[6].key,
r);
}
static void test_smooth_region (void) {
test_smooth_region_N(4);
test_smooth_region_N(5);
test_smooth_region6();
}
static void test_calculate_parameters (void) {
struct pma pma;
pma.N=4; pmainternal_calculate_parameters(&pma); assert(pma.uplgN==2); assert(pma.densitystep==0.5);
pma.N=8; pmainternal_calculate_parameters(&pma); assert(pma.uplgN==4); assert(pma.densitystep==0.5);
}
static void test_count_region (void) {
struct pair pairs[4]={{.key=0},{.key=0},{.key=0},{.key=0}};
assert(pmainternal_count_region(pairs,0,4)==0);
assert(pmainternal_count_region(pairs,2,4)==0);
assert(pmainternal_count_region(pairs,0,2)==0);
pairs[2].key="A";
assert(pmainternal_count_region(pairs,0,4)==1);
assert(pmainternal_count_region(pairs,2,4)==1);
assert(pmainternal_count_region(pairs,0,2)==0);
assert(pmainternal_count_region(pairs,2,2)==0);
assert(pmainternal_count_region(pairs,2,3)==1);
pairs[3].key="B";
pairs[0].key="a";
assert(pmainternal_count_region(pairs,0,4)==3);
}
static void test_pma_random_pick (void) {
PMA pma;
int r = pma_create(&pma);
bytevec key,val;
ITEMLEN keylen,vallen;
assert(r==0);
r = pma_random_pick(pma, &key, &keylen, &val, &vallen);
assert(r==DB_NOTFOUND);
r = pma_insert(pma, "hello", 6, "there", 6);
assert(r==BRT_OK);
r = pma_random_pick(pma, &key, &keylen, &val, &vallen);
assert(r==0);
assert(keylen==6); assert(vallen==6);
assert(strcmp(key,"hello")==0);
assert(strcmp(val,"there")==0);
r = pma_delete(pma, "nothello", 9);
assert(r==DB_NOTFOUND);
r = pma_delete(pma, "hello", 6);
assert(r==BRT_OK);
r = pma_random_pick(pma, &key, &keylen, &val, &vallen);
assert(r==DB_NOTFOUND);
r = pma_insert(pma, "hello", 6, "there", 6);
assert(r==BRT_OK);
r = pma_random_pick(pma, &key, &keylen, &val, &vallen);
assert(r==0);
assert(keylen==6); assert(vallen==6);
assert(strcmp(key,"hello")==0);
assert(strcmp(val,"there")==0);
r = pma_insert(pma, "aaa", 4, "athere", 7); assert(r==BRT_OK);
r = pma_insert(pma, "aab", 4, "bthere", 7); assert(r==BRT_OK);
r = pma_insert(pma, "aac", 4, "cthere", 7); assert(r==BRT_OK);
r = pma_insert(pma, "aad", 4, "dthere", 7); assert(r==BRT_OK);
r = pma_insert(pma, "aae", 4, "ethere", 7); assert(r==BRT_OK);
r = pma_insert(pma, "aaf", 4, "fthere", 7); assert(r==BRT_OK);
r = pma_insert(pma, "aag", 4, "gthere", 7); assert(r==BRT_OK);
r = pma_delete(pma, "aaa", 4); assert(r==BRT_OK);
r = pma_delete(pma, "aab", 4); assert(r==BRT_OK);
r = pma_delete(pma, "aac", 4); assert(r==BRT_OK);
r = pma_delete(pma, "aad", 4); assert(r==BRT_OK);
r = pma_delete(pma, "aae", 4); assert(r==BRT_OK);
r = pma_delete(pma, "aag", 4); assert(r==BRT_OK);
r = pma_delete(pma, "hello", 6); assert(r==BRT_OK);
r = pma_random_pick(pma, &key, &keylen, &val, &vallen);
assert(r==0);
assert(keylen==4); assert(vallen==7);
assert(strcmp(key,"aaf")==0);
assert(strcmp(val,"fthere")==0);
r=pma_free(&pma); assert(r==0);
assert(pma==0);
}
static void test_find_insert (void) {
PMA pma;
int r;
bytevec dv;
ITEMLEN dl;
pma_create(&pma);
r=pma_lookup(pma, "aaa", 3, &dv, &dl);
assert(r==DB_NOTFOUND);
r=pma_insert(pma, "aaa", 3, "aaadata", 7);
assert(r==BRT_OK);
dv=0; dl=0;
r=pma_lookup(pma, "aaa", 3, &dv, &dl);
assert(r==BRT_OK);
assert(keycompare(dv,dl,"aaadata", 7)==0);
r=pma_insert(pma, "bbb", 4, "bbbdata", 8);
assert(r==BRT_OK);
r=pma_lookup(pma, "aaa", 3, &dv, &dl);
assert(r==BRT_OK);
assert(keycompare(dv,dl,"aaadata", 7)==0);
r=pma_lookup(pma, "bbb", 4, &dv, &dl);
assert(r==BRT_OK);
assert(keycompare(dv,dl,"bbbdata", 8)==0);
assert((unsigned long)pma->pairs[pma_index_limit(pma)].key==0xdeadbeefL);
r=pma_insert(pma, "00000", 6, "d0", 3);
assert(r==BRT_OK);
assert((unsigned long)pma->pairs[pma_index_limit(pma)].key==0xdeadbeefL);
r=pma_free(&pma); assert(r==0); assert(pma==0);
pma_create(&pma); assert(pma!=0);
{
int i;
for (i=0; i<100; i++) {
char string[10];
char dstring[10];
snprintf(string,10,"%05d",i);
snprintf(dstring,10,"d%d", i);
printf("Inserting %d: string=%s dstring=%s\n", i, string, dstring);
r=pma_insert(pma, string, strlen(string)+1, dstring, strlen(dstring)+1);
assert(r==BRT_OK);
}
}
r=pma_free(&pma); assert(r==0); assert(pma==0);
}
static int tpi_k,tpi_v;
static void do_sum_em (bytevec key, ITEMLEN keylen, bytevec val, ITEMLEN vallen, void *v) {
assert((unsigned long)v==0xdeadbeefL);
assert(strlen(key)+1==keylen);
assert(strlen(val)+1==vallen);
tpi_k += atoi(key);
tpi_v += atoi(val);
}
static void test_pma_iterate_internal (PMA pma, int expected_k, int expected_v) {
tpi_k=tpi_v=0;
pma_iterate(pma, do_sum_em, (void*)0xdeadbeefL);
assert(tpi_k==expected_k);
assert(tpi_v==expected_v);
}
static void test_pma_iterate (void) {
PMA pma;
int r;
pma_create(&pma);
r=pma_insert(pma, "42", 3, "-19", 4);
assert(r==BRT_OK);
test_pma_iterate_internal(pma, 42, -19);
r=pma_insert(pma, "12", 3, "-100", 5);
assert(r==BRT_OK);
test_pma_iterate_internal(pma, 42+12, -19-100);
r=pma_free(&pma); assert(r==0); assert(pma==0);
}
static void test_pma_iterate2 (void) {
PMA pma0,pma1;
int r;
int sum=0;
int n_items=0;
r=pma_create(&pma0); assert(r==0);
r=pma_create(&pma1); assert(r==0);
pma_insert(pma0, "a", 2, "aval", 5);
pma_insert(pma0, "b", 2, "bval", 5);
pma_insert(pma1, "x", 2, "xval", 5);
PMA_ITERATE(pma0,kv __attribute__((__unused__)),kl,dv __attribute__((__unused__)),dl, (n_items++,sum+=kl+dl));
PMA_ITERATE(pma1,kv __attribute__((__unused__)),kl,dv __attribute__((__unused__)), dl, (n_items++,sum+=kl+dl));
assert(sum==21);
assert(n_items==3);
r=pma_free(&pma0); assert(r==0); assert(pma0==0);
r=pma_free(&pma1); assert(r==0); assert(pma1==0);
}
/* Check to see if we can create and kill a cursor. */
void test_pma_cursor_0 (void) {
PMA pma;
PMA_CURSOR c=0;
int r;
r=pma_create(&pma); assert(r==0);
r=pma_cursor(pma, &c); assert(r==0); assert(c!=0);
printf("%s:%d\n", __FILE__, __LINE__);
r=pma_free(&pma); assert(r!=0); /* didn't deallocate the cursor. */
printf("%s:%d\n", __FILE__, __LINE__);
r=pma_cursor_free(&c); assert(r==0);
printf("%s:%d\n", __FILE__, __LINE__);
r=pma_free(&pma); assert(r==0); /* did deallocate the cursor. */
}
/* Make sure we can free the cursors in any order. There is a doubly linked list of cursors
* and if we free them in a different order, then different unlinking code is invoked. */
void test_pma_cursor_1 (void) {
PMA pma;
PMA_CURSOR c0=0,c1=0,c2=0;
int r;
int order;
for (order=0; order<6; order++) {
r=pma_create(&pma); assert(r==0);
r=pma_cursor(pma, &c0); assert(r==0); assert(c0!=0);
r=pma_cursor(pma, &c1); assert(r==0); assert(c1!=0);
r=pma_cursor(pma, &c2); assert(r==0); assert(c2!=0);
r=pma_free(&pma); assert(r!=0);
if (order<2) { r=pma_cursor_free(&c0); assert(r==0); c0=c1; c1=c2; }
else if (order<4) { r=pma_cursor_free(&c1); assert(r==0); c1=c2; }
else { r=pma_cursor_free(&c2); assert(r==0); }
r=pma_free(&pma); assert(r!=0);
if (order%2==0) { r=pma_cursor_free(&c0); assert(r==0); c0=c1; }
else { r=pma_cursor_free(&c1); assert(r==0); }
r=pma_free(&pma); assert(r!=0);
r = pma_cursor_free(&c0); assert(r==0);
r=pma_free(&pma); assert(r==0);
}
}
void test_pma_cursor_2 (void) {
PMA pma;
PMA_CURSOR c=0;
int r;
DBT key,val;
ybt_init(&key); key.flags=DB_DBT_REALLOC;
ybt_init(&val); val.flags=DB_DBT_REALLOC;
r=pma_create(&pma); assert(r==0);
r=pma_cursor(pma, &c); assert(r==0); assert(c!=0);
r=pma_cursor_set_position_last(c); assert(r==DB_NOTFOUND);
r=pma_cursor_free(&c); assert(r==0);
r=pma_free(&pma); assert(r==0);
}
void test_pma_cursor (void) {
test_pma_cursor_0();
test_pma_cursor_1();
test_pma_cursor_2();
}
void pma_tests (void) {
memory_check=1;
test_pma_iterate(); memory_check_all_free();
test_pma_iterate2(); memory_check_all_free();
test_make_space_at(); memory_check_all_free();
test_smooth_region(); memory_check_all_free();
test_find_insert(); memory_check_all_free();
test_pma_find(); memory_check_all_free();
test_calculate_parameters(); memory_check_all_free();
test_count_region(); memory_check_all_free();
test_keycompare(); memory_check_all_free();
test_pma_random_pick(); memory_check_all_free();
test_pma_cursor(); memory_check_all_free();
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
pma_tests();
return 0;
}
/* An in-memory Packed Memory Array dictionary.
The keys and values are arrays of bytes, but are not necessarily kept in scan order.
Only the pointers are kept.
*/
#include "pma-internal.h"
#include "key.h"
#include "memory.h"
#include "myassert.h"
#include "../include/ydb-constants.h"
#include <stdio.h>
#include <errno.h>
/* Only needed for testing. */
#include <string.h>
int pma_n_entries (PMA pma) {
return pma->n_pairs_present;
}
int pma_index_limit (PMA pma) {
return pma->N;
}
int pmanode_valid (PMA pma, int i) {
assert(0<=i); assert(i<pma_index_limit(pma));
return pma->pairs[i].key!=0;
}
bytevec pmanode_key (PMA pma, int i) {
assert(0<=i); assert(i<pma_index_limit(pma));
return pma->pairs[i].key;
}
ITEMLEN pmanode_keylen (PMA pma, int i) {
assert(0<=i); assert(i<pma_index_limit(pma));
return pma->pairs[i].keylen;
}
bytevec pmanode_val (PMA pma, int i) {
assert(0<=i); assert(i<pma_index_limit(pma));
return pma->pairs[i].val;
}
ITEMLEN pmanode_vallen (PMA pma, int i) {
assert(0<=i); assert(i<pma_index_limit(pma));
return pma->pairs[i].vallen;
}
/* Could pick the same one every time if we wanted. */
int pma_random_pick(PMA pma, bytevec *key, ITEMLEN *keylen, bytevec *val, ITEMLEN *vallen) {
#if 1
int i;
/* For now a simple implementation where we simply start at the beginning and look. */
for (i=0; i<pma_index_limit(pma); i++) {
if (pma->pairs[i].key) {
*key = pmanode_key(pma,i);
*keylen = pmanode_keylen(pma,i);
*val = pmanode_val(pma,i);
*vallen = pmanode_vallen(pma,i);
return 0;
}
}
return DB_NOTFOUND;
#else
/* Maybe we should pick a random item to remove in order to reduce the unbalancing. */
int i;
int l = pma_index_limit(pma);
int r = random()%l;
/* For now a simple implementation where we simply start at the beginning and look. */
for (i=0; i<l; i++) {
int ir=(i+r)%l;
if (pma->pairs[ir].key) {
*key = pmanode_key(pma,ir);
*keylen = pmanode_keylen(pma,ir);
*val = pmanode_val(pma,ir);
*vallen = pmanode_vallen(pma,ir);
return 0;
}
}
return DB_NOTFOUND;
#endif
}
static int pma_count_finds=0;
static int pma_count_divides=0;
static int pma_count_scans=0;
void pma_show_stats (void) {
printf("%d finds, %d divides, %d scans\n", pma_count_finds, pma_count_divides, pma_count_scans);
}
// Return the smallest index such that no lower index contains a larger key.
// This will be in the range 0 (inclusive) to pma_index_limit(pma) (inclusive).
// Thus the returned index may not be a valid index into the array if it is == pma_index_limit(pma)
// For example: if the array is empty, that means we return 0.
// For example: if the array is full of small keys, that means we return pma_index_limit(pma), which is off the end of teh array.
// For example: if the array is full of large keys, then we return 0.
int pmainternal_find (PMA pma, bytevec key, int keylen) {
int lo=0, hi=pma_index_limit(pma);
/* lo and hi are the minimum and maximum values (inclusive) that we could possibly return. */
pma_count_finds++;
while (lo<hi) {
int mid;
// Scan forward looking for a non-null value.
for (mid=(lo+hi)/2; mid<hi; mid++) {
if (pma->pairs[mid].key!=0) {
// Found one.
int cmp = keycompare(key,keylen, pma->pairs[mid].key, pma->pairs[mid].keylen);
if (cmp==0) return mid;
else if (cmp<0) {
/* key is smaller than the midpoint, so look in the low half. */
hi = (lo+hi)/2; /* recalculate the midpoint, since mid is no necessarily the midpoint now. */
pma_count_divides++;
goto next_range;
} else {
/* key is larger than the midpoint. So look in the high half. */
lo = mid+1; /* The smallest value we could want to return is lo. */
pma_count_divides++;
goto next_range;
}
/* Not reached */
}
pma_count_scans++;
}
/* If we got here, all from mid to hi were null, so adjust hi to the midpoint. */
/* If the whole array is null, we'll end up returning index 0, which is good. */
hi = (lo+hi)/2;
pma_count_divides++;
next_range: ; /* We have adjusted lo and hi, so look again. */
}
assert(0<=lo);
assert(lo==hi);
assert(hi <= pma_index_limit(pma));
/* If lo points at something, the something should not be smaller than key. */
if (lo>0 && lo < pma_index_limit(pma) && pma->pairs[lo].key) {
//printf("lo=%d\n", lo);
assert(0 >= keycompare(key, keylen, pma->pairs[lo].key, pma->pairs[lo].keylen));
}
return lo;
}
//int min (int i, int j) { if (i<j) return i; else return j; }
//int max (int i, int j) { if (i<j) return j; else return i; }
//double lg (int n) { return log((double)n)/log(2.0); }
int pmainternal_printpairs (struct pair *pairs, int N) {
int count=0;
int i;
printf("{");
for (i=0; i<N; i++) {
if (i!=0) printf(" ");
if (pairs[i].key) {
printf("%s", (char*)pairs[i].key);
count++;
}
else printf("_");
}
printf("}");
return count;
}
void print_pma (PMA pma) {
int count;
printf("N=%d n_present=%d ", pma_index_limit(pma), pma->n_pairs_present);
count=pmainternal_printpairs(pma->pairs, pma_index_limit(pma));
printf("\n");
assert(count==pma->n_pairs_present);
}
/* Smooth the data, and return the location of the null. */
int distribute_data (struct pair *destpairs, int dcount,
struct pair *sourcepairs, int scount) {
assert(scount<=dcount);
if (scount==0) {
return -1;
}
if (scount==1) {
*destpairs=*sourcepairs;
if (destpairs->key==0) return 0;
else return -1;
} else {
int r1 = distribute_data(destpairs, dcount/2,
sourcepairs, scount/2);
int r2 = distribute_data(destpairs +dcount/2, dcount-dcount/2,
sourcepairs+scount/2, scount-scount/2);
assert(r1==-1 || r2==-1);
if (r1!=-1) return r1;
else if (r2!=-1) return r2+dcount/2;
else return -1;
}
}
/* spread the non-empty pairs around. There are n of them. Create an empty slot just before the IDXth
element, and return that slot's index in the smoothed array. */
int pmainternal_smooth_region (struct pair *pairs, int n, int idx) {
int i;
int n_present=0;
for (i=0; i<n; i++) {
if (pairs[i].key) n_present++;
}
n_present++; // Save one for the blank guy.
{
struct pair *MALLOC_N(n_present,tmppairs);
int n_saved=0;
int r;
for (i=0; i<n; i++) {
if (i==idx) {
tmppairs[n_saved++].key = 0;
}
if (pairs[i].key) {
tmppairs[n_saved++] = pairs[i];
}
pairs[i].key = 0;
pairs[i].keylen = 0;
pairs[i].val = 0;
pairs[i].vallen = 0;
}
if (idx==n) {
tmppairs[n_saved++].key = 0;
}
//printf(" temp="); printpairs(tmppairs, n_saved);
assert(n_saved==n_present);
/* Now the tricky part. Distribute the data. */
r=distribute_data (pairs, n,
tmppairs, n_saved);
my_free(tmppairs);
return r;
}
}
int lg (int n) {
int result=0;
int two_to_result = 1;
while (two_to_result<n) {
result++;
two_to_result*=2;
}
return result;
}
void pmainternal_calculate_parameters (PMA pma)
/* Calculate densitystep and uplgN, given N. */
{
int N = pma_index_limit(pma);
int lgN = lg(N);
int n_divisions=0;
//printf("N=%d lgN=%d\n", N, lgN);
while (N/2>=lgN) {
n_divisions++;
N/=2;
}
pma->uplgN=N;
//printf("uplgN = %d n_divisions=%d\n", pma->uplgN, n_divisions);
assert(n_divisions>0);
pma->densitystep = 0.5/n_divisions;
}
int pmainternal_count_region (struct pair *pairs, int lo, int hi) {
int n=0;
while (lo<hi) {
if (pairs[lo].key) n++;
lo++;
}
return n;
}
int pma_create (PMA *pma) {
TAGMALLOC(PMA, result);
int i;
if (result==0) return -1;
result->N = 4;
result->n_pairs_present = 0;
MALLOC_N((1+result->N),result->pairs);
result->pairs[result->N].key = (void*)0xdeadbeef;
//printf("pairs=%p (size=%d)\n", result->pairs,result->N*sizeof(*result->pairs));
if (result->pairs==0) {
my_free(result);
return -1;
}
for (i=0; i<result->N; i++) {
result->pairs[i].key = 0;
result->pairs[i].keylen = 0;
result->pairs[i].val = 0;
result->pairs[i].vallen = 0;
}
pmainternal_calculate_parameters(result);
result->cursors_head = result->cursors_tail = 0;
*pma = result;
assert((unsigned long)result->pairs[result->N].key==0xdeadbeefL);
return 0;
}
int pma_cursor (PMA pma, PMA_CURSOR *cursp) {
PMA_CURSOR MALLOC(curs);
if (errno!=0) return errno;
assert(curs!=0);
curs->position=-1; /* undefined */
if (pma->cursors_head) {
pma->cursors_head->prev = curs;
} else {
pma->cursors_tail = curs;
}
curs->next = pma->cursors_head;
curs->prev = 0;
curs->pma = pma;
curs->skey = 0;
curs->sval=0;
pma->cursors_head = curs;
*cursp=curs;
return 0;
}
int pma_cursor_set_position_last (PMA_CURSOR c)
{
PMA pma = c->pma;
c->position=pma->N-1;
while (c->pma->pairs[c->position].key==0) {
if (c->position>0) c->position--;
else return DB_NOTFOUND;
}
return 0;
}
int pma_cursor_set_position_first (PMA_CURSOR c)
{
PMA pma = c->pma;
c->position=0;
while (c->pma->pairs[c->position].key==0) {
if (c->position+1<pma->N) c->position++;
else return DB_NOTFOUND;
}
return 0;
}
int pma_cget_current (PMA_CURSOR c, DBT *key, DBT *val) {
PMA pma = c->pma;
if (pma->pairs[c->position].key==0) return BRT_KEYEMPTY;
ybt_set_value(key, pma->pairs[c->position].key, pma->pairs[c->position].keylen, &c->skey);
ybt_set_value(val, pma->pairs[c->position].val, pma->pairs[c->position].vallen, &c->sval);
return 0;
}
#if 0
int pma_cget_first (PMA_CURSOR c, YBT *key, YBT *val) {
PMA pma=c->pma;
c->position=0;
if (pma->n_pairs_present==0) return DB_NOTFOUND;
while (pma->pairs[c->position].key==0 && c->position<pma->N) {
c->position++;
}
assert(c->position<pma->N && pma->pairs[c->position].key!=0);
ybt_set_value(key, pma->pairs[c->position].key, pma->pairs[c->position].keylen, &c->skey);
ybt_set_value(val, pma->pairs[c->position].val, pma->pairs[c->position].vallen, &c->sval);
return 0;
}
#endif
int pma_cursor_free (PMA_CURSOR *cursp) {
PMA_CURSOR curs=*cursp;
PMA pma = curs->pma;
if (curs->prev==0) {
assert(pma->cursors_head==curs);
pma->cursors_head = curs->next;
} else {
curs->prev->next = curs->next;
}
if (curs->next==0) {
assert(pma->cursors_tail==curs);
pma->cursors_tail = curs->prev;
} else {
curs->next->prev = curs->prev;
}
if (curs->skey) my_free(curs->skey);
if (curs->sval) my_free(curs->sval);
my_free(curs);
*cursp=0;
return 0;
}
/* Make some space for a key to go at idx (the thing currently at idx should end up at to the right.) */
/* Return the new index. (Making space may involve moving things around, including the hole at index.) */
int pmainternal_make_space_at (PMA pma, int idx) {
/* Within a range LO to HI we have a limit of how much packing we will tolerate.
* We allow the entire array to be 50% full.
* We allow a region of size lgN to be full.
* At sizes in between, we interpolate.
*/
int size=pma->uplgN;
int lo=idx;
int hi=idx;
double density=1.0;
while (1) {
/* set hi-lo equal size, make sure it is a supserset of (hi,lo). */
lo=idx-size/2;
hi=idx+size/2;
//printf("lo=%d hi=%d\n", lo, hi);
if (lo<0) { hi-=lo; lo=0; }
else if (hi>pma_index_limit(pma)) { lo-=(hi-pma_index_limit(pma)); hi=pma_index_limit(pma); }
else { ; /* nothing */ }
//printf("lo=%d hi=%d\n", lo, hi);
assert(0<=lo); assert(lo<hi); assert(hi<=pma_index_limit(pma)); assert(hi-lo==size); // separate into separate assertions so that gcov doesn't see branches not taken.
assert(density>0.499); assert(density<=1);
if (density<0.5001) { assert(lo==0); assert(hi==pma_index_limit(pma)); }
{
int count = (1+ /* Don't forget space for the new guy. */
pmainternal_count_region(pma->pairs, lo, hi));
if (count/(double)(hi-lo) <= density) break;
if (lo==0 && hi==pma_index_limit(pma)) {
/* The array needs to be doubled in size. */
int i;
assert(size==pma_index_limit(pma));
size*=2;
//printf("realloc %p to %d\n", pma->pairs, size*sizeof(*pma->pairs));
pma->pairs = my_realloc(pma->pairs, (1+size)*sizeof(*pma->pairs));
for (i=hi; i<size; i++) pma->pairs[i].key=0;
pma->pairs[size].key = (void*)0xdeadbeefL;
pma->N=size;
pmainternal_calculate_parameters(pma);
hi=size;
//printf("doubled N\n");
break;
}
}
density-=pma->densitystep;
size*=2;
}
//printf("%s:%d Smoothing from %d to %d to density %f\n", __FILE__, __LINE__, lo, hi, density);
{
int new_index = pmainternal_smooth_region(pma->pairs+lo, hi-lo, idx-lo);
return new_index+lo;
}
}
/* Exposes internals of the PMA by returning a pointer to the guts.
* Don't modify the returned data. Don't free it. */
enum pma_errors pma_lookup (PMA pma, bytevec key, ITEMLEN keylen, bytevec*val, ITEMLEN *vallen) {
int l = pmainternal_find(pma, key, keylen);
assert(0<=l ); assert(l<=pma_index_limit(pma));
if (l==pma_index_limit(pma)) return DB_NOTFOUND;
if (keycompare(key,keylen,pma->pairs[l].key,pma->pairs[l].keylen)==0) {
*val = pma->pairs[l].val;
*vallen = pma->pairs[l].vallen;
return BRT_OK;
} else {
return DB_NOTFOUND;
}
}
void maybe_free (const void *p) {
if (p) my_free((void*)p);
}
/* returns 0 if OK.
* You must have freed all the cursors, otherwise returns nonzero and does nothing. */
int pma_free (PMA *pmap) {
int i;
PMA pma=*pmap;
if (pma->cursors_head) return -1;
for (i=0; i<pma_index_limit(pma); i++) {
if (pma->pairs[i].key) {
maybe_free(pma->pairs[i].key);
maybe_free(pma->pairs[i].val);
pma->pairs[i].key=0;
pma->pairs[i].val=0;
}
}
my_free(pma->pairs);
my_free(pma);
*pmap=0;
return 0;
}
/* Copies keylen and datalen */
int pma_insert (PMA pma, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen) {
int idx = pmainternal_find(pma, key, keylen);
if (idx < pma_index_limit(pma) && pma->pairs[idx].key) {
if (0==keycompare(key, keylen, pma->pairs[idx].key, pma->pairs[idx].keylen)) {
return BRT_ALREADY_THERE; /* It is already here. Return an error. */
}
}
if (pma->pairs[idx].key) {
idx = pmainternal_make_space_at (pma, idx); /* returns the new idx. */
}
assert(!pma->pairs[idx].key);
pma->pairs[idx].key = memdup(key, keylen);
pma->pairs[idx].keylen = keylen;
pma->pairs[idx].val = memdup(data, datalen);
pma->pairs[idx].vallen = datalen;
pma->n_pairs_present++;
return BRT_OK;
}
#if 0
void smooth_after_delete (PMA pma, int idx) {
int size=pma->uplgN;
int lo=idx;
int hi=idx;
double density=0.1;
while (1) {
lo=idx-size/2;
hi=idx+size/2;
if (lo<0) { hi-=lo; lo=0; }
else if (hi>pma_index_limit(pma)) { lo-=(hi-pma_index_limit(pma)); hi=pma_index_limit(pma); }
else { ; /* nothing */ }
assert(density<0.25);
{
int count=pmainternal_count_region(pma->pairs, lo, hi);
if (count/(double)(hi-lo) >= density) break;
if (lo==0 && hi==pma_index_limit(pma)) {
/* The array needs to be shrunk */
}
#endif
int pma_delete (PMA pma, bytevec key, ITEMLEN keylen) {
int l = pmainternal_find(pma, key, keylen);
if (pma->pairs[l].key==0) {
printf("%s:%d l=%d r=%d\n", __FILE__, __LINE__, l, DB_NOTFOUND);
return DB_NOTFOUND;
}
assert(pma->pairs[l].val!=0);
my_free((void*)pma->pairs[l].key);
my_free((void*)pma->pairs[l].val);
pma->pairs[l].key = 0;
pma->pairs[l].val = 0;
pma->pairs[l].keylen = 0;
pma->pairs[l].vallen = 0;
pma->n_pairs_present--;
// Need to rebalance
// smooth_after_delete(pma,l);
return BRT_OK;
}
void pma_iterate (PMA pma, void(*f)(bytevec,ITEMLEN,bytevec,ITEMLEN, void*), void*v) {
int i;
for (i=0; i<pma_index_limit(pma); i++) {
if (pma->pairs[i].key) {
f(pma->pairs[i].key, pma->pairs[i].keylen,
pma->pairs[i].val, pma->pairs[i].vallen,
v);
}
}
}
#ifndef PMA_H
#define PMA_H
#include "brttypes.h"
#include "ybt.h"
#include "yerror.h"
/* An in-memory Packed Memory Array dictionary. */
/* There is a built-in-cursor. */
typedef struct pma *PMA;
typedef struct pma_cursor *PMA_CURSOR;
/* All functions return 0 on success. */
int pma_create (PMA *);
/* returns 0 if OK.
* You must have freed all the cursors, otherwise returns nonzero and does nothing. */
int pma_free (PMA *);
int pma_n_entries (PMA);
/* Returns an error if the key is already present. */
/* The values returned should not be modified.by the caller. */
/* Any cursors should be updated. */
/* Duplicates the key and keylen. */
enum pma_errors pma_insert (PMA, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen);
/* This returns an error if the key is NOT present. */
int pma_replace (PMA, bytevec key, ITEMLEN keylen, bytevec data, ITEMLEN datalen);
/* This returns an error if the key is NOT present. */
int pma_delete (PMA, bytevec key, ITEMLEN keylen);
/* Exposes internals of the PMA by returning a pointer to the guts.
* Don't modify the returned data. Don't free it. */
enum pma_errors pma_lookup (PMA, bytevec key, ITEMLEN keylen, bytevec*data, ITEMLEN *datalen);
/* Move the cursor to the beginning or the end or to a key */
int pma_cursor (PMA, PMA_CURSOR *);
int pma_cursor_free (PMA_CURSOR*);
int pma_cursor_set_position_last (PMA_CURSOR c);
int pma_cursor_set_position_first (PMA_CURSOR c);
int pma_cget_current (PMA_CURSOR c, DBT *key, DBT *val);
/* Return PMA_NOTFOUND if the pma is empty. */
#if 0
int pma_cget_first (PMA_CURSOR, YBT */*key*/, YBT */*val*/);
int pma_cursor_first (PMA);
int pma_cursor_last (PMA);
int pma_cursor_set (PMA, bytevec key, int keylen);
int pma_cursor_next (PMA);
int pma_cursor_prev (PMA);
int pma_cursor_get (PMA, bytevec *key, int *keylen, bytevec *data, int *datalen);
#endif
int pma_random_pick(PMA, bytevec *key, ITEMLEN *keylen, bytevec *data, ITEMLEN *datalen);
int pma_index_limit(PMA);
int pmanode_valid(PMA,int);
bytevec pmanode_key(PMA,int);
ITEMLEN pmanode_keylen(PMA,int);
bytevec pmanode_val(PMA,int);
ITEMLEN pmanode_vallen(PMA,int);
void pma_iterate (PMA, void(*)(bytevec,ITEMLEN,bytevec,ITEMLEN, void*), void*);
#define PMA_ITERATE(table,keyvar,keylenvar,datavar,datalenvar,body) ({ \
int __i; \
for (__i=0; __i<pma_index_limit(table); __i++) { \
if (pmanode_valid(table,__i)) { \
bytevec keyvar = pmanode_key(table,__i); \
ITEMLEN keylenvar = pmanode_keylen(table,__i); \
bytevec datavar = pmanode_val(table, __i); \
ITEMLEN datalenvar = pmanode_vallen(table, __i); \
body; \
} } })
#endif
int keycompare (bytevec key1, ITEMLEN key1len, bytevec key2, ITEMLEN key2len);
/* Test random insertions using db4 */
#include <assert.h>
#include <db.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <string.h>
#include <limits.h>
enum { MAX_PATHNAME_LEN = 100 };
const char dir[]="db4dir";
DB_ENV *env=0;
DB *db=0;
#if DB_VERSION_MINOR == 0
#define IF40(x,y) x
#else
#define IF40(x,y) y
#endif
void create_directory (void) {
char command[MAX_PATHNAME_LEN];
int r;
r=snprintf(command, MAX_PATHNAME_LEN, "rm -rf %s", dir);
assert(r<MAX_PATHNAME_LEN);
system(command);
r=mkdir(dir, 0777);
assert(r==0);
r=db_env_create(&env, 0);
assert(r==0);
r=env->set_cachesize(env, 0, 512*(1<<20), 0);
assert(r==0);
IF40((void)0,
({
unsigned int gbytes,bytes;
int ncaches;
r=env->get_cachesize(env, &gbytes, &bytes, &ncaches);
assert(r==0);
printf("Using %.2fMiB Berkeley DB Cache Size\n", gbytes*1024 + ((double)bytes/(1<<20)));
}));
r= env->open(env, dir, DB_CREATE|DB_INIT_MPOOL,0777); // No logging.
assert(r==0);
r=db_create(&db, env, 0);
assert(r==0);
IF40(
r=db->open(db, "files", 0, DB_BTREE, DB_CREATE, 0777),
r=db->open(db, 0, "files", 0, DB_BTREE, DB_CREATE, 0777));
assert(r==0);
}
int write_one (long int n1, long int n2) {
char keystring[100],valstring[100];
int keysize;
int datasize;
DB_TXN *txn=0;
DBT key,data;
int r;
keysize = snprintf(keystring, 100, "%08lx%08lx", n1, n2);
datasize = snprintf(valstring, 100, "%ld %ld %ld %ld %ld %ld", n1, n2, (long)(random()), (long)(random()), (long)(random()), (long)(random()));
memset(&key, 0, sizeof(key));
memset(&data, 0, sizeof(data));
key.data = keystring;
key.size = keysize;
data.data = valstring;
data.size = datasize;
r = db->put(db, txn, &key, &data, 0);
assert(r==0);
return keysize+datasize;
}
/* Write a sequence evenly spaced. */
long long write_sequence (int n_inserts) {
unsigned int step = UINT_MAX/n_inserts;
int i,j;
long long n_bytes=0;
printf("%d inserts, step %d\n", n_inserts, step);
for (i=0,j=0; i<n_inserts; i++,j+=step) {
n_bytes+=write_one(j, random());
}
return n_bytes;
}
long long write_random (int n_inserts) {
int i;
long long n_bytes=0;
for (i=0; i<n_inserts; i++) {
n_bytes+=write_one(random(), random());
}
return n_bytes;
}
double tdiff (struct timeval *t1, struct timeval *t0) {
return (t1->tv_sec-t0->tv_sec)+1e-6*(t1->tv_usec-t0->tv_usec);
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
int n_s_inserts=200000000;
int n_inserts=50000;
struct timeval t0,t1,t00;
long long n_bytes;
int r;
create_directory();
gettimeofday(&t0, 0);
n_bytes=write_sequence(n_s_inserts);
gettimeofday(&t00, 0);
r=db->sync(db, 0); assert(r==0);
gettimeofday(&t1, 0);
{
double t = tdiff(&t1, &t0);
printf("%9d sequential inserts in %.3fs (%.3fs in sync), %.1f inserts/s. %lld bytes, %.1f bytes/s\n", n_s_inserts, t, tdiff(&t1,&t00), n_s_inserts/t, n_bytes, n_bytes/t);
}
gettimeofday(&t0, 0);
n_bytes=write_random(n_inserts);
gettimeofday(&t00, 0);
r=db->sync(db, 0); assert(r==0);
gettimeofday(&t1, 0);
{
double t = tdiff(&t1, &t0);
printf("%9d random inserts in %.3fs (%.3fs in sync), %.1f inserts/s. %lld bytes, %.1f bytes/s\n", n_inserts, t, tdiff(&t1, &t00), n_inserts/t, n_bytes, n_bytes/t);
}
gettimeofday(&t0, 0);
r=db->close(db,0); assert(r==0);
r=env->close(env,0); assert(r==0);
gettimeofday(&t1, 0);
printf("Time to close %.3fs\n", tdiff(&t1,&t0));
return 0;
}
#define _FILE_OFFSET_BITS 64
#include "ybt.h"
#include "memory.h"
#include <assert.h>
#include <string.h>
void ybt_test0 (void) {
void *v0=0,*v1=0;
DBT t0,t1;
ybt_init(&t0);
ybt_init(&t1);
ybt_set_value(&t0, "hello", 6, &v0);
ybt_set_value(&t1, "foo", 4, &v1);
assert(t0.size==6);
assert(strcmp(t0.data, "hello")==0);
assert(t1.size==4);
assert(strcmp(t1.data, "foo")==0);
ybt_set_value(&t1, "byebye", 7, &v0); /* Use v0, not v1 */
assert(strcmp(t0.data, "byebye")==0); /* t0's data should be changed too, since it used v0 */
assert(strcmp(t1.data, "byebye")==0);
my_free(v0); my_free(v1);
memory_check_all_free();
/* See if we can probe to find out how big something is by setting ulen=0 with YBT_USERMEM */
ybt_init(&t0);
t0.flags = DB_DBT_USERMEM;
t0.ulen = 0;
ybt_set_value(&t0, "hello", 6, 0);
assert(t0.data==0);
assert(t0.size==6);
/* Check realloc. */
ybt_init(&t0);
t0.flags = DB_DBT_REALLOC;
v0 = 0;
ybt_set_value(&t0, "internationalization", 21, &v0);
assert(v0==0); /* Didn't change v0 */
assert(t0.size==21);
assert(strcmp(t0.data, "internationalization")==0);
ybt_set_value(&t0, "provincial", 11, &v0);
assert(t0.size==11);
assert(strcmp(t0.data, "provincial")==0);
my_free(t0.data);
memory_check_all_free();
}
int main (int argc __attribute__((__unused__)), char *argv[] __attribute__((__unused__))) {
ybt_test0();
return 0;
}
#define _FILE_OFFSET_BITS 64
#include "ybt.h"
#include "memory.h"
#include <errno.h>
#include <string.h>
int ybt_init (DBT *ybt) {
memset(ybt, 0, sizeof(*ybt));
return 0;
}
int ybt_set_value (DBT *ybt, bytevec val, ITEMLEN vallen, void **staticptrp) {
if (ybt->flags==DB_DBT_MALLOC) {
domalloc:
ybt->data = my_malloc(vallen);
if (errno!=0) return errno;
ybt->ulen = vallen;
} else if (ybt->flags==DB_DBT_REALLOC) {
if (ybt->data==0) goto domalloc;
ybt->data = my_realloc(ybt->data, vallen);
if (errno!=0) return errno;
ybt->ulen = vallen;
} else if (ybt->flags==DB_DBT_USERMEM) {
/*nothing*/
} else {
if (staticptrp==0) return -1;
void *staticptr=*staticptrp;
if (staticptr==0)
staticptr = my_malloc(vallen);
else
staticptr = my_realloc(staticptr, vallen);
if (errno!=0) return errno;
*staticptrp = staticptr;
ybt->data = staticptr;
ybt->ulen = vallen;
}
ybt->size = vallen;
if (ybt->ulen>0) {
if (ybt->ulen<vallen) vallen=ybt->ulen;
memcpy(ybt->data, val, vallen);
}
return 0;
}
#ifndef YBT_H
#define YBT_H
// brttypes.h must be first to make 64-bit file mode work right in linux.
#include "brttypes.h"
#include "../include/db.h"
int ybt_init (DBT *);
int ybt_set_value (DBT *, bytevec val, ITEMLEN vallen, void **staticptrp);
#endif
enum pma_errors { BRT_OK=0, BRT_ALREADY_THERE = -2, BRT_KEYEMPTY=-3 };
enum typ_tag { TYP_BRTNODE = 0xdead0001, TYP_CACHETABLE, TYP_PMA };
CFLAGS = -Wall -W -Werror -g
pma: LDFLAGS=-lm
pma:
pma.o:
CFLAGS = -g -W -Wall -Wno-unused
CPPFLAGS = -I../include
C_OBJS= mut_pthread.lo \
bt_compare.lo bt_conv.lo bt_curadj.lo bt_cursor.lo bt_delete.lo \
bt_method.lo bt_open.lo bt_put.lo bt_rec.lo bt_reclaim.lo \
bt_recno.lo bt_rsearch.lo bt_search.lo bt_split.lo bt_stat.lo \
bt_upgrade.lo bt_verify.lo btree_auto.lo crdel_auto.lo \
crdel_rec.lo db.lo db_am.lo db_auto.lo db_byteorder.lo db_cam.lo \
db_conv.lo db_dispatch.lo db_dup.lo db_err.lo db_getlong.lo \
db_idspace.lo db_iface.lo db_join.lo db_log2.lo db_meta.lo \
db_method.lo db_open.lo db_overflow.lo db_pr.lo db_rec.lo \
db_reclaim.lo db_rename.lo db_remove.lo db_ret.lo db_salloc.lo \
db_shash.lo db_truncate.lo db_upg.lo db_upg_opd.lo db_vrfy.lo \
db_vrfyutil.lo dbm.lo dbreg.lo dbreg_auto.lo dbreg_rec.lo \
dbreg_util.lo env_file.lo env_method.lo env_open.lo env_recover.lo \
env_region.lo fileops_auto.lo fop_basic.lo fop_rec.lo \
fop_util.lo hash.lo hash_auto.lo hash_conv.lo hash_dup.lo \
hash_func.lo hash_meta.lo hash_method.lo hash_open.lo \
hash_page.lo hash_rec.lo hash_reclaim.lo hash_stat.lo \
hash_upgrade.lo hash_verify.lo hmac.lo hsearch.lo lock.lo \
lock_deadlock.lo lock_method.lo lock_region.lo lock_stat.lo \
lock_util.lo log.lo log_archive.lo log_compare.lo log_get.lo \
log_method.lo log_put.lo mp_alloc.lo mp_bh.lo mp_fget.lo \
mp_fopen.lo mp_fput.lo mp_fset.lo mp_method.lo mp_region.lo \
mp_register.lo mp_stat.lo mp_sync.lo mp_trickle.lo mutex.lo \
os_abs.lo os_alloc.lo os_clock.lo os_config.lo os_dir.lo \
os_errno.lo os_fid.lo os_fsync.lo os_handle.lo os_id.lo \
os_map.lo os_method.lo os_oflags.lo os_open.lo os_region.lo \
os_rename.lo os_root.lo os_rpath.lo os_rw.lo os_seek.lo \
os_sleep.lo os_spin.lo os_stat.lo os_tmpdir.lo os_unlink.lo \
qam.lo qam_auto.lo qam_conv.lo qam_files.lo qam_method.lo \
qam_open.lo qam_rec.lo qam_stat.lo qam_upgrade.lo qam_verify.lo \
rep_method.lo rep_record.lo rep_region.lo rep_util.lo sha1.lo \
txn.lo txn_auto.lo txn_method.lo txn_rec.lo txn_recover.lo \
txn_region.lo txn_stat.lo txn_util.lo xa.lo xa_db.lo xa_map.lo
install: libdb.so
cp libdb.so ../src/
libdb.so: ydb.lo bdbw.lo
echo cc ydb.lo bdbw.lo BDB-OBJS -shared -fPIC -o libdb.so $(CFLAGS)
@cc ydb.lo bdbw.lo $(patsubst %,/home/bradley/mysql/build-bdb-with-uniquename/bdb/build_unix/%,$(C_OBJS)) -shared -fPIC -o libdb.so $(CFLAGS)
ydb.lo: bdbw.h
bdbw.lo: CPPFLAGS=-I/home/bradley/mysql/build-bdb-with-uniquename/bdb/build_unix
%.lo: %.c
cc $(CPPFLAGS) $< -c -fPIC -o $@ $(CFLAGS)
cd ~/yobiduck/ydb/src
make
cd ~/mysql/bdbi/mysql-5.0.27/
export LD_RUN_PATH=/home/bradley/yobiduck/ydb/src
./configure --with-berkeley-db-includes=/home/bradley/yobiduck/ydb/include --with-berkeley-db --with-berkeley-db-libs=/home/bradley/yobiduck/ydb/src --prefix=/home/bradley/usr
make
make install
#This one may not be needed
~/mysql/bdbi/usr/bin/mysql_install_db
#
pushd /home/bradley/mysql/bdbi/usr/ ; /home/bradley/mysql/bdbi/usr//bin/mysqld_safe &
popd
~/mysql/bdbi/usr/bin/mysql -u root
mysql> show databases;
mysql> create database yobitest;
mysql> use yobitest;
mysql> create table t1 (i int) engine=bdb;
Look for the error in /home/bradley/mysql/bdbi/usr/var/yobert.err
----
This links right:
LD_LIBRARY_PATH=/home/bradley/mysql/bdbi/usr/lib/mysql/ ldd sql/mysqld
----
This works,
LD_LIBRARY_PATH=/home/bradley/mysql/bdbi/usr/lib/mysql/ /home/bradley/mysql/bdbi/usr//bin/mysqld
producing the following in the log
061208 16:11:35 InnoDB: Started; log sequence number 0 43655
ydb.c:78 db_env_create flags=0
----
the LD_RUN_PATH thing above works.
--- on laptop I did this instead:
cd ~/mysql/mysql-5.0.27/
export LD_RUN_PATH=/home/bradley/yobiduck/ydb/src
./configure --with-berkeley-db-includes=/home/bradley/yobiduck/ydb/include --with-berkeley-db --with-berkeley-db-libs=/home/bradley/yobiduck/src --prefix=/home/bradley/usr
make
make install
/home/bradley/usr/bin/mysql_install_db
/home/bradley/usr/bin/mysqld_safe &
/home/bradley/usr/bin/mysql -u root
~/mysql/bdbi/usr/bin/mysql -u root
mysql> show databases;
mysql> create database yobitest;
mysql> use yobitest;
mysql> create table t1 (i int) engine=bdb;
Look for the error in ~/usr/var/localhost.localdomain.err
---
To clean up after a total screwup:
rm -rf ~/usr/var/
Didn't manage to clean it up very well.
---
Goal: compile mysql with debugging
export LD_RUN_PATH=/home/bradley/yobiduck/ydb/src
./configure CFLAGS="-g -O2" --with-berkeley-db-includes=/home/bradley/yobiduck/ydb/include --with-berkeley-db --with-berkeley-db-libs=/home/bradley/yobiduck/src --prefix=/home/bradley/usr
If you want to debug, you might need to start mysqld without using mysqld_safe.
Here is one way to do it:
gdb ~/usr/libexec/mysqld
(gdb) run --basedir=/home/bradley/usr --datadir=/home/bradley/usr/var --pid-file=/home/bradley/usr/var/localhost.localdomain.pid --skip-external-locking
That was screwed up (the configure args were wrong.) Try again:
---
export LD_RUN_PATH=/home/bradley/yobiduck/ydb/src
./configure CXXFLAGS="-g -O2" CFLAGS="-g -O2" --with-berkeley-db-includes=/home/bradley/yobiduck/ydb/include --with-berkeley-db --with-berkeley-db-libs=/home/bradley/yobiduck/ydb/src --prefix=/home/bradley/usr
gdb ~/usr/libexec/mysqld
(gdb) run --basedir=/home/bradley/usr --datadir=/home/bradley/usr/var --pid-file=/home/bradley/usr/var/localhost.localdomain.pid --skip-external-locking
and
~/usr/bin/mysql -u root
Note: Had to change mysql to declare berkeley_cmp_hidden_key and berkeley_cmp_packed_key to be extern, not static.
----
To start the mysql clean:
rm -r /home/bradley/usr/var/
/home/bradley/usr/bin/mysql_install_db
then run mysqld and mysql do
$ ~/usr/bin/mysql -u root
mysql> create database yobitest;
mysql> use yobitest;
mysql> create table t1 (i int) engine=bdb;
mysql> insert t1 values (3);
mysql> quit
~/usr/bin/mysqladmin -u root shutdown
Then create the trace1 from ydbtrace.c
(copy the output onto traces/trace1.c
cd traces
make
rm ~/usr/var/log.0000000001
rm ~/usr/var/yobitest/t1.db
./runtrace1
/* Wrapper for bdb.c. */
#include <sys/types.h>
/* This includes the ydb db.h, but with unique names for everything. */
#include "ydb-uniq.h"
/* This include is to the berkeley-db compiled with --with-uniquename */
#include <db.h>
/* This include is to the interface between ydb and bdb. */
#include "bdbw.h"
#include <sys/types.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <assert.h>
#include <sys/time.h>
#include <time.h>
#define barf() ({ fprintf(stderr, "YDB: BARF %s:%d in %s\n", __FILE__, __LINE__, __func__); })
#define barff(fmt,...) ({ fprintf(stderr, "YDB: BARF %s:%d in %s, ", __FILE__, __LINE__, __func__); fprintf(stderr, fmt, __VA_ARGS__); })
#define note() ({ fprintf(stderr, "YDB: Note %s:%d in %s\n", __FILE__, __LINE__, __func__); })
#define notef(fmt,...) ({ fprintf(stderr, "YDB: Note %s:%d in %s, ", __FILE__, __LINE__, __func__); fprintf(stderr, fmt, __VA_ARGS__); })
static char *tracefname = "/home/bradley/ydbtrace.c";
static FILE *traceout=0;
unsigned long long objnum=1;
void tracef (const char *fmt, ...) __attribute__((format (printf, 1, 2)));
void tracef (const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
if (traceout==0) {
struct timeval tv;
char *ctimes;
gettimeofday(&tv, 0);
ctimes = ctime(&tv.tv_sec);
ctimes[strlen(ctimes)-1]=0;
traceout=fopen(tracefname, "a");
assert(traceout);
fprintf(stderr, "traceout created\n");
fprintf(traceout, "/* bdbw trace captured %s (%ld.%06ld) */\n",
ctimes, tv.tv_sec, tv.tv_usec);
}
vfprintf(traceout, fmt, ap);
fflush(traceout);
va_end(ap);
}
struct db_env_ydb_internal {
unsigned long long objnum;
DB_ENV *env;
void (*noticecall)(DB_ENV_ydb*, db_notices_ydb);
};
struct yobi_db_txn_internal {
long long objnum;
DB_TXN *txn;
};
static void ydb_env_err (const DB_ENV_ydb *env, int error, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "YDB Error %d:", error);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
#define doit(flag) ({ if (flag ## _ydb & flags) { gotit|=flag; flags&=~flag ## _ydb; } })
void doits_internal (u_int32_t flag_ydb, u_int32_t flag_bdb, char *flagname, u_int32_t *flags_ydb, u_int32_t *flags_bdb, char **flagstring, int *flagstringlen) {
if (flag_ydb & *flags_ydb) {
int len = strlen(flagname);
*flags_bdb |= flag_bdb;
*flags_ydb &= ~flag_ydb;
assert(len + 2 < *flagstringlen);
snprintf(*flagstring, *flagstringlen, "|%s", flagname);
*flagstring += len+1;
*flagstringlen -= len+1;
}
}
#define doits(flag) doits_internal(flag ## _ydb, flag, #flag, &flags, &gotit, &flagstring, &flagstringlen)
static u_int32_t convert_envopen_flags(u_int32_t flags, char *flagstring, int flagstringlen) {
u_int32_t gotit=0;
snprintf(flagstring, flagstringlen, "0"); flagstringlen--; flagstring++;
doits(DB_INIT_LOCK);
doits(DB_INIT_LOG);
doits(DB_INIT_MPOOL);
doits(DB_INIT_TXN);
doits(DB_CREATE);
doits(DB_THREAD);
doits(DB_RECOVER);
doits(DB_PRIVATE);
assert(flags==0);
return gotit;
}
static u_int32_t open_flags_ydb_2_bdb (u_int32_t flags, char *flagstring, int flagstringlen) {
u_int32_t gotit=0;
snprintf(flagstring, flagstringlen, "0"); flagstringlen--; flagstring++;
doits(DB_CREATE);
doits(DB_RDONLY);
doits(DB_RECOVER);
doits(DB_THREAD);
assert(flags==0);
return gotit;
}
u_int32_t convert_db_create_flags(u_int32_t flags) {
if (flags==0) return 0;
abort();
}
#define retit(flag) ({ if (flag ## _ydb == flags) { strncpy(flagstring, #flag ,flagstringlen); return flag; } })
u_int32_t convert_c_get_flags(u_int32_t flags, char *flagstring, int flagstringlen) {
retit(DB_FIRST);
retit(DB_LAST);
retit(DB_NEXT);
abort();
}
int ydb_env_open (DB_ENV_ydb *env, const char *home, u_int32_t flags, int mode) {
int r;
char flagstring[1000];
u_int32_t bdb_flags = convert_envopen_flags(flags, flagstring, sizeof(flagstring));
//note();
r = env->i->env->open(env->i->env, home, bdb_flags, mode);
tracef("r = envobj(%lld)->open(envobj(%lld), \"%s\", %s, 0%o); assert(r==%d);\n",
env->i->objnum, env->i->objnum, home, flagstring, mode, r);
return r;
}
int bdbw_env_close (DB_ENV_ydb * env, u_int32_t flags) {
int r;
notef("flags=%d\n", flags);
assert(flags==0);
r = env->i->env->close(env->i->env, 0);
env->i->env=0;
// free(env);
return r;
}
u_int32_t convert_log_archive_flags (u_int32_t flags, char *flagstring, int flagstringlen) {
retit(DB_ARCH_ABS);
retit(DB_ARCH_LOG);
abort();
}
int ydb_env_log_archive (DB_ENV_ydb *env, char **list[], u_int32_t flags) {
int r;
char flagstring[1000];
int bdbflags = convert_log_archive_flags(flags, flagstring, sizeof(flagstring));
r = env->i->env->log_archive(env->i->env, list, bdbflags);
assert(r==0);
tracef("{ char **list; r = envobj(%lld)->log_archive(envobj(%lld), &list, %s); assert(r==%d); }\n",
env->i->objnum, env->i->objnum, flagstring, r);
return r;
}
int ydb_env_log_flush (DB_ENV_ydb * env, const DB_LSN_ydb * lsn) {
barf();
return 1;
}
int ydb_env_set_cachesize (DB_ENV_ydb * env, u_int32_t gbytes, u_int32_t bytes, int ncache) {
return env->i->env->set_cachesize(env->i->env, gbytes, bytes, ncache);
}
int ydb_env_set_data_dir (DB_ENV_ydb * env, const char *dir) {
return env->i->env->set_data_dir(env->i->env, dir);
}
void ydb_env_set_errcall (DB_ENV_ydb *env, void (*errcall)(const char *, char *)) {
env->i->env->set_errcall(env->i->env, errcall);
}
void ydb_env_set_errpfx (DB_ENV_ydb * env, const char *errpfx) {
env->i->env->set_errpfx(env->i->env, errpfx);
}
int ydb_env_set_flags (DB_ENV_ydb *env, u_int32_t flags, int onoff) {
assert(flags==0);
return env->i->env->set_flags(env->i->env, flags, onoff);
}
int ydb_env_set_lg_bsize (DB_ENV_ydb * env, u_int32_t bsize) {
return env->i->env->set_lg_bsize(env->i->env, bsize);
}
int ydb_env_set_lg_dir (DB_ENV_ydb *env, const char * dir) {
barf();
return 1;
}
int ydb_env_set_lg_max (DB_ENV_ydb *env, u_int32_t lg_max) {
return env->i->env->set_lg_max(env->i->env, lg_max);
}
int ydb_env_set_lk_detect (DB_ENV_ydb *env, u_int32_t detect) {
return env->i->env->set_lk_detect(env->i->env, detect);
}
int ydb_env_set_lk_max (DB_ENV_ydb *env, u_int32_t lk_max) {
return env->i->env->set_lk_max(env->i->env, lk_max);
}
void ydbenv_bdb_noticecall (DB_ENV *bdb_env, db_notices notices) {
DB_ENV_ydb *ydb_env = bdb_env->app_private;
tracef("/* Doing noticecall */\n");
assert(notices==0 || notices==DB_NOTICE_LOGFILE_CHANGED);
ydb_env->i->noticecall(ydb_env, notices==0 ? 0 : DB_NOTICE_LOGFILE_CHANGED_ydb);
}
extern void berkeley_noticecall (DB_ENV_ydb *, db_notices_ydb);
void ydb_env_set_noticecall (DB_ENV_ydb *env, void (*noticecall)(DB_ENV_ydb *, db_notices_ydb)) {
env->i->env->set_noticecall(env->i->env, ydbenv_bdb_noticecall);
env->i->noticecall = noticecall;
{
const char *fun_name;
if (noticecall==berkeley_noticecall) {
fun_name = "berkeley_noticecall";
} else {
fun_name = "Unknown_function";
}
tracef("envobj(%lld)->set_noticecall(envobj(%lld), %s);\n",
env->i->objnum, env->i->objnum, fun_name);
}
}
int ydb_env_set_tmp_dir (DB_ENV_ydb * env, const char *tmp_dir) {
int r = env->i->env->set_tmp_dir(env->i->env, tmp_dir);
tracef("r = envobj(%lld)->set_tmp_dir(envobj(%lld), \"%s\"); assert(r==%d);\n",
env->i->objnum, env->i->objnum, tmp_dir, r);
return r;
}
int ydb_env_set_verbose (DB_ENV_ydb *env, u_int32_t which, int onoff) {
barf();
return 1;
}
int ydb_env_txn_checkpoint (DB_ENV_ydb *env, u_int32_t kbyte, u_int32_t min, u_int32_t flags) {
int r;
assert(flags==0);
r=env->i->env->txn_checkpoint(env->i->env, kbyte, min, 0);
assert(r==0);
tracef("r=envobj(%lld)->txn_checkpoint(envobj(%lld), %u, %u, %u); assert(r==0);\n",
env->i->objnum, env->i->objnum, kbyte, min, flags);
return r;
}
int ydb_env_txn_stat (DB_ENV_ydb *env, DB_TXN_STAT_ydb **statp, u_int32_t flags) {
barf();
return 1;
}
int db_env_create_bdbw (struct yobi_db_env **envp, u_int32_t flags) {
struct yobi_db_env *result = malloc(sizeof(*result));
int r;
//note();
result->i = malloc(sizeof(*result->i));
result->i->objnum = objnum++;
result->err = ydb_env_err;
result->open = ydb_env_open;
result->close = bdbw_env_close;
result->txn_checkpoint = ydb_env_txn_checkpoint;
result->log_flush = ydb_env_log_flush;
result->set_errcall = ydb_env_set_errcall;
result->set_errpfx = ydb_env_set_errpfx;
result->set_noticecall = ydb_env_set_noticecall;
result->set_flags = ydb_env_set_flags;
result->set_data_dir = ydb_env_set_data_dir;
result->set_tmp_dir = ydb_env_set_tmp_dir;
result->set_verbose = ydb_env_set_verbose;
result->set_lg_bsize = ydb_env_set_lg_bsize;
result->set_lg_dir = ydb_env_set_lg_dir;
result->set_lg_max = ydb_env_set_lg_max;
result->set_cachesize = ydb_env_set_cachesize;
result->set_lk_detect = ydb_env_set_lk_detect;
result->set_lk_max = ydb_env_set_lk_max;
result->log_archive = ydb_env_log_archive;
result->txn_stat = ydb_env_txn_stat;
result->txn_begin = txn_begin_bdbw;
r = db_env_create_4001(&result->i->env, flags);
result->i->env->app_private = result;
*envp = result;
tracef("r=db_env_create(new_envobj(%lld), %u); assert(r==%d);\n",
result->i->objnum, flags, r);
return r;
}
int yobi_db_txn_commit (DB_TXN_ydb *txn, u_int32_t flags) {
int r;
//notef("flags=%d\n", flags);
assert(flags==0);
r = txn->i->txn->commit(txn->i->txn, 0);
txn->i->txn = 0;
assert(flags==0); // need to convert otherwise.
tracef("r=txnobj(%lld)->commit(txnobj(%lld), %d); assert(r==%d);\n",
txn->i->objnum, txn->i->objnum, flags, r);
// free(txn);
return r;
}
u_int32_t yobi_db_txn_id (DB_TXN_ydb *txn) {
barf();
abort();
}
// There is no txn_begin when generated with --with-uniquename.
int txn_begin_bdbw (struct yobi_db_env *env, struct yobi_db_txn *stxn, struct yobi_db_txn **txn, u_int32_t flags) {
int r;
struct yobi_db_txn *result = malloc(sizeof(*result));
result->commit = yobi_db_txn_commit;
result->id = yobi_db_txn_id;
result->i = malloc(sizeof(*result->i));
result->i->objnum = objnum++;
//note();
r = env->i->env->txn_begin(env->i->env,
stxn ? stxn->i->txn : 0,
&result->i->txn, flags);
*txn = result;
tracef("r = envobj(%lld)->txn_begin(envobj(%lld), ", env->i->objnum , env->i->objnum);
if (!stxn) tracef("0, "); else tracef(" txnobj(%lld), ", stxn->i->objnum);
tracef("new_txnobj(%lld), 0x%x); ", result->i->objnum, flags);
tracef(" assert(r==%d);\n", r);
return r;
}
int txn_abort_bdbw (DB_TXN_ydb *txn) {
barf();
abort();
}
int txn_commit_bdbw (DB_TXN_ydb *txn, u_int32_t flags) {
int r;
u_int32_t bdbflags = 0;
char *bdbflagsstring = "0";
assert(flags==0);
r = txn->i->txn->commit(txn->i->txn, bdbflags);
assert(r==0);
tracef("r=txnobj(%lld)->commit(txnobj(%lld), %s); assert(r==%d);\n",
txn->i->objnum, txn->i->objnum, bdbflagsstring, r);
return r;
}
struct ydb_db_internal {
long long objnum;
DB *db;
int (*bt_compare)(DB_ydb *, const DBT_ydb *, const DBT_ydb *);
};
static int bdbw_db_close (DB_ydb *db, u_int32_t flags) {
int r;
//notef("flags=%d\n", flags);
assert(flags==0);
r = db->i->db->close(db->i->db, 0);
tracef("r=dbobj(%lld)->close(dbobj(%lld), 0); assert(r==0);\n",
db->i->objnum, db->i->objnum);
db->i->db = 0;
// free(db);
return r;
}
struct yobi_dbc_internal {
DBC *dbc;
long long objnum;
};
void dbt_bdb2ydb (DBT *da, DBT_ydb *a, const char *varname) {
u_int32_t aflags = a->flags;
memset(da, 0, sizeof(*da));
tracef(" memset(&%s,0,sizeof(a));\n", varname);
da->data = a->data;
if (aflags==DB_DBT_USERMEM_ydb) {
aflags &= ~DB_DBT_USERMEM_ydb;
da->flags |= DB_DBT_USERMEM;
tracef(" %s.flags |= DB_DBT_USERMEM;\n", varname);
if (a->ulen>0) {
tracef(" %s.data = malloc(%d);\n", varname, a->ulen);
} else {
tracef(" %s.data = 0;\n", varname);
}
da->ulen = a->ulen;
tracef(" %s.ulen = %d;\n", varname, a->ulen);
}
assert(aflags==0);
}
int yobi_dbc_c_get (DBC_ydb *dbc, DBT_ydb *a, DBT_ydb *b, u_int32_t flags) {
int r;
DBT da;
DBT db;
const int flagstringlen=100;
char flagstring[flagstringlen];
int bdb_flags = convert_c_get_flags(flags, flagstring, flagstringlen);
tracef("{ DBT a,b; \n");
dbt_bdb2ydb(&da, a, "a");
dbt_bdb2ydb(&db, b, "b");
assert(flags==DB_LAST_ydb || flags==DB_FIRST_ydb || flags==DB_NEXT_ydb);
r = dbc->i->dbc->c_get(dbc->i->dbc, &da, &db, bdb_flags);
tracef(" r = dbcobj(%lld)->c_get(dbcobj(%lld), ",
dbc->i->objnum, dbc->i->objnum);
tracef(" &a, &b, ");
tracef(" %s);\n", flagstring);
if (r==0) {
tracef(" assert(r==%d);\n", r);
tracef(" assert(a.size==%d);\n", da.size);
//tracef(" assert(memcmp(a.address, ");
tracef(" assert(b.size==%d);\n", db.size);
a->size = da.size;
a->data = da.data;
b->size = db.size;
b->data = db.data;
assert(r==0);
} else if (r==DB_PAGE_NOTFOUND) {
tracef(" assert(r==DB_PAGE_NOTFOUND);\n");
} else if (r==DB_NOTFOUND) {
tracef(" assert(r==DB_NOTFOUND);\n");
} else {
printf("DB Error r=%d: %s\n", r, db_strerror(r));
abort();
}
tracef("}\n");
return r;
}
int yobi_dbc_c_close (DBC_ydb *dbc) {
int r;
r = dbc->i->dbc->c_close(dbc->i->dbc);
assert(r==0);
tracef("r=dbcobj(%lld)->c_close(dbcobj(%lld)); assert(r==%d);\n",
dbc->i->objnum, dbc->i->objnum, r);
dbc->i->dbc = 0;
// free(dbc->i); free(dbc);
return r;
}
int yobi_dbc_c_del (DBC_ydb *dbc, u_int32_t flags) {
barf();
abort();
}
static int bdbw_db_cursor (DB_ydb *db, DB_TXN_ydb *txn, DBC_ydb **c, u_int32_t flags) {
struct yobi_dbc *dbc = malloc(sizeof(*dbc));
int r;
dbc->c_get = yobi_dbc_c_get;
dbc->c_close = yobi_dbc_c_close;
dbc->c_del = yobi_dbc_c_del;
dbc->i = malloc(sizeof(*dbc->i));
assert(dbc->i);
assert(flags==0);
dbc->i->objnum = objnum++;
r=db->i->db->cursor(db->i->db, txn ? txn->i->txn : 0, &dbc->i->dbc, flags);
assert(r==0);
//note();
*c = dbc;
tracef("r=dbobj(%lld)->cursor(dbobj(%lld), txnobj(%lld), new_dbcobj(%lld), %d); assert(r==%d);\n",
db->i->objnum, db->i->objnum, txn ? txn->i->objnum : -1, dbc->i->objnum, flags, r);
return r;
}
static int bdbw_db_del (DB_ydb *db, DB_TXN_ydb *txn, DBT_ydb *dbt, u_int32_t flags) {
barf();
abort();
}
static int bdbw_db_get (DB_ydb *db, DB_TXN_ydb *txn, DBT_ydb *dbta, DBT_ydb *dbtb, u_int32_t flags) {
barf();
abort();
}
static int bdbw_db_key_range (DB_ydb *db, DB_TXN_ydb *txn, DBT_ydb *dbt, DB_KEY_RANGE_ydb *kr, u_int32_t flags) {
barf();
abort();
}
static int bdbw_db_open (DB_ydb *db, DB_TXN_ydb *txn, const char *fname, const char *dbname, DBTYPE_ydb dbtype, u_int32_t flags, int mode) {
int r;
char flagstring[1000];
u_int32_t bdb_flags = open_flags_ydb_2_bdb(flags, flagstring, sizeof(flagstring));
//notef("txn=%p fname=%s dbname=%s dbtype=%d flags=0x%x (bdb=0x%x) %mode=0%o\n", txn, fname, dbname, dbtype, flags, bdb_flags, mode);
assert(dbtype == DB_BTREE_ydb);
r = db->i->db->open(db->i->db,
txn ? txn->i->txn : 0,
fname, dbname, DB_BTREE, bdb_flags, mode);
assert(db->i->db->app_private == db);
tracef("r=dbobj(%lld)->open(dbobj(%lld), txnobj(%lld), \"%s\", \"%s\",",
db->i->objnum, db->i->objnum, txn ? txn->i->objnum : -1, fname, dbname);
if (dbtype==DB_BTREE_ydb) tracef(" DB_BTREE,");
else abort();
tracef(" %s, 0%o);", flagstring, mode);
assert(r==0);
tracef(" assert(r==%d);\n", r);
return r;
}
static int bdbw_bt_compare (DB *db, const DBT *a, const DBT *b) {
DB_ydb *ydb = db->app_private;
DBT_ydb a_y, b_y;
note();
assert(ydb);
a_y.data = a->data;
a_y.size = a->size;
b_y.data = b->data;
b_y.size = b->size;
return ydb->i->bt_compare(ydb, &a_y, &b_y);
}
u_int32_t convert_put_flags(u_int32_t flags, char *flagstring, int flagstringlen) {
if (flags==0) {
snprintf(flagstring, flagstringlen, "0");
return 0;
}
retit(DB_NOOVERWRITE);
abort();
}
int bdbw_db_put (DB_ydb *db, DB_TXN_ydb *txn, DBT_ydb *dbta, DBT_ydb *dbtb, u_int32_t flags) {
int r;
unsigned int i;
DBT a,b;
char flagstring[1000];
u_int32_t bdbflags = convert_put_flags(flags, flagstring, sizeof(flagstring));
assert(dbta->flags==0); assert(dbtb->flags==0);
assert(dbta->ulen==0); assert(dbtb->ulen==0);
tracef("{ DBT a,b;\n");
tracef(" unsigned char adata[%d] = {", dbta->size);
for (i=0; i<dbta->size; i++) {
if (i>0) tracef(", ");
tracef("%u", ((unsigned char*)(dbta->data))[i]);
}
tracef("};\n unsigned char bdata[%d] = {", dbtb->size);
for (i=0; i<dbtb->size; i++) {
if (i>0) tracef(", ");
tracef("%u", ((unsigned char*)(dbtb->data))[i]);
}
tracef("};\n memset(&a,0,sizeof(a)); memset(&b,0,sizeof(b));\n");
tracef(" a.data = adata; b.data=bdata;\n");
tracef(" a.flags= 0; b.flags=0;\n");
tracef(" a.ulen=0; b.ulen=0;\n");
tracef(" a.size=%d; b.size=%d;\n", dbta->size, dbtb->size);
memset(&a, 0, sizeof(a));
memset(&b, 0, sizeof(b));
a.data = dbta->data; b.data = dbtb->data;
a.flags = 0; b.flags = 0;
a.ulen = 0; b.ulen = 0;
a.size = dbta->size; b.size = dbtb->size;
r=db->i->db->put(db->i->db, txn ? txn->i->txn : 0, &a, &b, flags);
assert(r==0);
tracef(" r=dbobj(%lld)->put(dbobj(%lld), txnobj(%lld), &a, &b, %s); assert(r==%d);\n}\n",
db->i->objnum, db->i->objnum, txn ? txn->i->objnum : -1, flagstring, r);
return r;
}
int bdbw_db_remove (DB_ydb *db, const char *fname, const char *dbname, u_int32_t flags) {
barf();
abort();
}
int bdbw_db_rename (DB_ydb *db, const char *namea, const char *nameb, const char *namec, u_int32_t flags) {
barf();
abort();
}
extern int berkeley_cmp_hidden_key(DB_ydb *, const DBT_ydb *, const DBT_ydb *);
static int bdbw_db_set_bt_compare (DB_ydb *db, int (*bt_compare)(DB_ydb *, const DBT_ydb *, const DBT_ydb *)) {
int r;
r = db->i->db->set_bt_compare(db->i->db, bdbw_bt_compare);
db->i->bt_compare = bt_compare;
{
const char *fun_name;
if (bt_compare==berkeley_cmp_hidden_key) {
fun_name = "berkeley_cmp_hidden_key";
} else {
fun_name = "Unknown_function";
}
tracef("r = dbobj(%lld)->set_bt_compare(dbobj(%lld), %s); assert(r==%d);\n",
db->i->objnum, db->i->objnum, fun_name, r);
}
return r;
}
int bdbw_db_set_flags (DB_ydb *db, u_int32_t flags) {
int r;
assert(flags==0);
r = db->i->db->set_flags(db->i->db, 0);
assert(r==0);
tracef("r=dbobj(%lld)->set_flags(dbobj(%lld), 0); assert(r==0);\n",
db->i->objnum, db->i->objnum);
return r;
}
int bdbw_db_stat (DB_ydb *db, void *v, u_int32_t flags) {
barf();
abort();
}
int db_create_bdbw (DB_ydb **db, DB_ENV_ydb *env, u_int32_t flags) {
DB_ydb *result=malloc(sizeof(*result));
int r;
result->app_private = 0;
result->close = bdbw_db_close;
result->cursor = bdbw_db_cursor;
result->del = bdbw_db_del;
result->get = bdbw_db_get;
result->key_range = bdbw_db_key_range;
result->open = bdbw_db_open;
result->put = bdbw_db_put;
result->remove = bdbw_db_remove;
result->rename = bdbw_db_rename;
result->set_bt_compare = bdbw_db_set_bt_compare;
result->set_flags = bdbw_db_set_flags;
result->stat = bdbw_db_stat;
result->i = malloc(sizeof(*result->i));
r=db_create(&result->i->db, env->i->env, convert_db_create_flags(flags));
result->i->objnum = objnum++;
result->i->db->app_private = result;
result->i->bt_compare = 0;
*db = result;
tracef("r=db_create(new_dbobj(%lld), envobj(%lld), %d); assert(r==%d);\n",
result->i->objnum, env->i->objnum, flags, r);
return r;
}
#if 0
void bdbw_db_env_err (const DB_ENV_ydb *env, int error, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "YDB Error %d:", error);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
#define barf() ({ fprintf(stderr, "YDB: BARF %s:%d in %s\n", __FILE__, __LINE__, __func__); })
#define barff(fmt,...) ({ fprintf(stderr, "YDB: BARF %s:%d in %s, ", __FILE__, __LINE__, __func__); fprintf(stderr, fmt, __VA_ARGS__); })
#define note() ({ fprintf(stderr, "YDB: Note %s:%d in %s\n", __FILE__, __LINE__, __func__); })
#define notef(fmt,...) ({ fprintf(stderr, "YDB: Note %s:%d in %s, ", __FILE__, __LINE__, __func__); fprintf(stderr, fmt, __VA_ARGS__); })
void print_flags (u_int32_t flags) {
u_int32_t gotit=0;
int doneone=0;
#define doit(flag) if (flag & flags) { if (doneone) printf(" | "); printf("%s", #flag); doneone=1; gotit|=flag; }
printf(" flags=");
doit(DB_INIT_LOCK_ydb);
doit(DB_INIT_LOG_ydb);
doit(DB_INIT_MPOOL_ydb);
doit(DB_INIT_TXN_ydb);
doit(DB_CREATE_ydb);
doit(DB_THREAD_ydb);
doit(DB_RECOVER_ydb);
doit(DB_PRIVATE_ydb);
if (gotit!=flags) printf(" flags 0x%x not accounted for", flags&~gotit);
printf("\n");
}
int yobi_db_env_open (DB_ENV_ydb *env, const char *home, u_int32_t flags, int mode) {
notef("(%p, \"%s\", 0x%x, 0%o)\n", env, home, flags, mode);
env->dir = strdup(home);
env->open_flags = flags;
env->open_mode = mode;
print_flags(flags);
assert(DB_PRIVATE & flags); // This means that we don't have to do anything with shared memory. And that's good enough for mysql.
return 0;
}
int yobi_db_env_close (DB_ENV_ydb * env, u_int32_t flags) {
barf();
return 1;
}
int yobi_db_env_log_archive (DB_ENV_ydb *env, char **list[], u_int32_t flags) {
barf();
return 1;
}
int yobi_db_env_log_flush (DB_ENV_ydb * env, const DB_LSN_ydb * lsn) {
barf();
return 1;
}
int yobi_db_env_set_cachesize (DB_ENV_ydb * env, u_int32_t gbytes, u_int32_t bytes, int ncache) {
barf();
return 1;
}
int yobi_db_env_set_data_dir (DB_ENV_ydb * env, const char *dir) {
barf();
return 1;
}
void yobi_db_env_set_errcall (DB_ENV_ydb *env, void (*errcall)(const char *, char *)) {
note();
env->errcall=errcall;
}
void yobi_db_env_set_errpfx (DB_ENV_ydb * env, const char *errpfx) {
notef("(%p, %s)\n", env, errpfx);
env->errpfx = errpfx;
}
int yobi_db_env_set_flags (DB_ENV_ydb *env, u_int32_t flags, int onoff) {
barf();
return 1;
}
int yobi_db_env_set_lg_bsize (DB_ENV_ydb * env, u_int32_t bsize) {
barf();
return 1;
}
int yobi_db_env_set_lg_dir (DB_ENV_ydb * env, const char * dir) {
barf();
return 1;
}
int yobi_db_env_set_lg_max (DB_ENV_ydb *env, u_int32_t lg_max) {
barf();
return 1;
}
int yobi_db_env_set_lk_detect (DB_ENV_ydb *env, u_int32_t detect) {
barf();
return 1;
}
int yobi_db_env_set_lk_max (DB_ENV_ydb *env, u_int32_t lk_max) {
barf();
return 1;
}
void yobi_db_env_set_noticecall (DB_ENV_ydb *env, void (*noticeall)(DB_ENV_ydb *, db_notices_ydb)) {
barf();
}
int yobi_db_env_set_tmp_dir (DB_ENV_ydb * env, const char *tmp_dir) {
barf();
return 1;
}
int yobi_db_env_set_verbose (DB_ENV_ydb *env, u_int32_t which, int onoff) {
barf();
return 1;
}
int yobi_db_env_txn_checkpoint (DB_ENV_ydb *env, u_int32_t kbyte, u_int32_t min, u_int32_t flags) {
barf();
return 1;
}
int yobi_db_env_txn_stat (DB_ENV_ydb *env, DB_TXN_STAT_ydb **statp, u_int32_t flags) {
barf();
return 1;
}
void yobi_default_errcall(const char *errpfx, char *msg) {
fprintf(stderr, "YDB: %s: %s", errpfx, msg);
}
int yobi_db_txn_commit (DB_TXN_ydb *txn, u_int32_t flags) {
notef("flags=%d\n", flags);
return 0;
}
u_int32_t yobi_db_txn_id (DB_TXN_ydb *txn) {
barf();
abort();
}
int log_compare_ydb (const DB_LSN_ydb *a, const DB_LSN_ydb *b) {
fprintf(stderr, "%s:%d log_compare(%p,%p)\n", __FILE__, __LINE__, a, b);
abort();
}
#endif
#ifndef _BDBW_H
#define _BDBW_H
#if defined(__cplusplus)
extern "C" {
#if 0
}
#endif
#endif
int db_env_create_bdbw (struct yobi_db_env **, u_int32_t);
int txn_abort_bdbw (struct yobi_db_txn *);
int txn_begin_bdbw (struct yobi_db_env *env, struct yobi_db_txn *stxn, struct yobi_db_txn **txn, u_int32_t flags);
int txn_commit_bdbw (struct yobi_db_txn *, u_int32_t);
int db_create_bdbw (struct yobi_db **, struct yobi_db_env *, u_int32_t);
#if 0
typedef enum {
DB_BTREE=1,
// DB_HASH=2,
// DB_RECNO=3,
// DB_QUEUE=4,
// DB_UNKNOWN=5 /* Figure it out on open. */
} DBTYPE;
typedef enum {
DB_NOTICE_LOGFILE_CHANGED
} db_notices;
enum {
DB_VERB_CHKPOINT = 0x0001,
DB_VERB_DEADLOCK = 0x0002,
DB_VERB_RECOVERY = 0x0004
};
typedef struct yobi_db DB;
typedef struct yobi_db_btree_stat DB_BTREE_STAT;
typedef struct yobi_db_env DB_ENV;
typedef struct yobi_db_key_range DB_KEY_RANGE;
typedef struct yobi_db_lsn DB_LSN;
typedef struct yobi_db_txn DB_TXN;
typedef struct yobi_db_txn_active DB_TXN_ACTIVE;
typedef struct yobi_db_txn_stat DB_TXN_STAT;
typedef struct yobi_dbc DBC;
typedef struct yobi_dbt DBT;
struct yobi_db {
void *app_private;
int (*close) (DB *, u_int32_t);
int (*cursor) (DB *, DB_TXN *, DBC **, u_int32_t);
int (*del) (DB *, DB_TXN *, DBT *, u_int32_t);
int (*get) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t);
int (*key_range) (DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t);
int (*open) (DB *, DB_TXN *,
const char *, const char *, DBTYPE, u_int32_t, int);
int (*put) (DB *, DB_TXN *, DBT *, DBT *, u_int32_t);
int (*remove) (DB *, const char *, const char *, u_int32_t);
int (*rename) (DB *, const char *, const char *, const char *, u_int32_t);
int (*set_bt_compare) (DB *,
int (*)(DB *, const DBT *, const DBT *));
int (*set_flags) (DB *, u_int32_t);
int (*stat) (DB *, void *, u_int32_t);
struct ydb_db_internal *i;
};
enum {
DB_DBT_MALLOC = 0x002,
DB_DBT_REALLOC = 0x010,
DB_DBT_USERMEM = 0x020,
DB_DBT_DUPOK = 0x040
};
struct yobi_dbt {
void *app_private;
void *data;
u_int32_t flags;
u_int32_t size;
u_int32_t ulen;
};
struct yobi_db_txn {
int (*commit) (DB_TXN*, u_int32_t);
u_int32_t (*id) (DB_TXN *);
};
struct yobi_dbc {
int (*c_get) (DBC *, DBT *, DBT *, u_int32_t);
int (*c_close) (DBC *);
int (*c_del) (DBC *, u_int32_t);
};
struct yobi_db_env {
// Methods used by MYSQL
void (*err) (const DB_ENV *, int, const char *, ...);
int (*open) (DB_ENV *, const char *, u_int32_t, int);
int (*close) (DB_ENV *, u_int32_t);
int (*txn_checkpoint) (DB_ENV *, u_int32_t, u_int32_t, u_int32_t);
int (*log_flush) (DB_ENV *, const DB_LSN *);
void (*set_errcall) (DB_ENV *, void (*)(const char *, char *));
void (*set_errpfx) (DB_ENV *, const char *);
void (*set_noticecall) (DB_ENV *, void (*)(DB_ENV *, db_notices));
int (*set_flags) (DB_ENV *, u_int32_t, int);
int (*set_data_dir) (DB_ENV *, const char *);
int (*set_tmp_dir) (DB_ENV *, const char *);
int (*set_verbose) (DB_ENV *, u_int32_t, int);
int (*set_lg_bsize) (DB_ENV *, u_int32_t);
int (*set_lg_dir) (DB_ENV *, const char *);
int (*set_lg_max) (DB_ENV *, u_int32_t);
int (*set_cachesize) (DB_ENV *, u_int32_t, u_int32_t, int);
int (*set_lk_detect) (DB_ENV *, u_int32_t);
int (*set_lk_max) (DB_ENV *, u_int32_t);
int (*log_archive) (DB_ENV *, char **[], u_int32_t);
int (*txn_stat) (DB_ENV *, DB_TXN_STAT **, u_int32_t);
// Internal state
void (*errcall)(const char *, char *);
const char *errpfx;
char *dir; /* A malloc'd copy of the directory. */
u_int32_t open_flags;
int open_mode;
};
struct yobi_db_key_range {
double less,equal,grater;
};
struct yobi_db_btree_stat {
u_int32_t bt_ndata;
u_int32_t bt_nkeys;
};
struct yobi_db_txn_stat {
u_int32_t st_nactive;
DB_TXN_ACTIVE *st_txnarray;
};
struct yobi_db_lsn {
int hello;
};
struct yobi_db_txn_active {
DB_LSN lsn;
u_int32_t txnid;
};
#ifndef _YDB_WRAP_H
#define DB_VERSION_STRING "Yobiduck: Fractal DB (November 19, 2006)"
#else
#define DB_VERSION_STRING_ydb "Yobiduck: Fractal DB (November 19, 2006) (wrapped bdb)"
#endif
enum {
DB_ARCH_ABS = 0x001,
DB_ARCH_LOG = 0x004
};
enum {
//DB_AFTER = 1,
DB_FIRST = 10,
DB_GET_BOTH = 11,
DB_LAST = 18,
DB_NEXT = 19,
DB_NEXT_DUP = 20,
DB_PREV = 27,
DB_SET = 30,
DB_SET_RANGE = 32,
DB_RMW = 0x40000000
};
enum {
DB_KEYEMPTY = -30998,
DB_KEYEXIST = -30997,
DB_LOCK_DEADLOCK = -30996,
DB_NOTFOUND = -30991
};
enum {
DB_CREATE = 0x0000001,
DB_RDONLY = 0x0000010,
DB_RECOVER = 0x0000020,
DB_THREAD = 0x0000040,
DB_TXN_NOSYNC = 0x0000100,
DB_PRIVATE = 0x0100000
};
enum {
DB_LOCK_DEFAULT = 1,
DB_LOCK_OLDEST = 7,
DB_LOCK_RANDOM = 8
};
enum {
DB_DUP = 0x000002
};
enum {
DB_NOOVERWRITE = 23
};
enum {
DB_INIT_LOCK = 0x001000,
DB_INIT_LOG = 0x002000,
DB_INIT_MPOOL = 0x004000,
DB_INIT_TXN = 0x008000
};
int db_env_create (DB_ENV **, u_int32_t);
int txn_begin (DB_ENV *, DB_TXN *, DB_TXN **, u_int32_t);
int txn_abort (DB_TXN *);
int log_compare (const DB_LSN *, const DB_LSN *);
#endif
#if defined(__cplusplus)
}
#endif
#endif
This directory provides a wrapper using the ydb db.h header file but
it calls the bdb internals. To get this to work requires a little
magic, since both BDB and YDB use the same type names.
BDB helps with a ./configure option (--with-uniquename) that makes it
so that all the link-time symbols have different names. But the type
names and enums collide. Hence we cannot include both the
../include/db.h and the BDB db.h.
To fix that we have a header ydb-uniq.h, which uses completely
different names for everything in the ydb interface. But those names
are compatible with the standard names: That is all the structs have
the same layout.
Thus we have the following three headers
bdb_db.h which defines things like db_env_create_4001(DBENV **, ...)
ydb_db.h which defines things like db_env_create(DBENV **, ...) (but the DBENV is a different type from BDB's)
bdbw.h which defines things like db_env_create_ydb(DBENV_ydb **, ...)
bdbw.h can be included with bdb_db.h or ydb_db.h
bdb_db.h and ydb_db.h cannot both be included in the same file.
An application, such as mysql, includes db.h from ../include
That is the ydb header file.
#ifndef _YDB_WRAP_H
#define _YDB_WRAP_H
#define DB_BTREE DB_BTREE_ydb
#define DB_NOTICE_LOGFILE_CHANGED DB_NOTICE_LOGFILE_CHANGED_ydb
#define DBTYPE DBTYPE_ydb
#define db_notices db_notices_ydb
#define txn_abort txn_abort_ydb
#define txn_begin txn_begin_ydb
#define txn_commit txn_commit_ydb
#define DB_VERB_CHKPOINT DB_VERB_CHKPOINT_ydb
#define DB_VERB_DEADLOCK DB_VERB_DEADLOCK_ydb
#define DB_VERB_RECOVERY DB_VERB_RECOVERY_ydb
#define DB DB_ydb
#define DB_BTREE_STAT DB_BTREE_STAT_ydb
#define DB_ENV DB_ENV_ydb
#define DB_KEY_RANGE DB_KEY_RANGE_ydb
#define DB_LSN DB_LSN_ydb
#define DB_TXN DB_TXN_ydb
#define DB_TXN_ACTIVE DB_TXN_ACTIVE_ydb
#define DB_TXN_STAT DB_TXN_STAT_ydb
#define DBC DBC_ydb
#define DBT DBT_ydb
#define DB_DBT_MALLOC B_DBT_MALLOC_ydb
#define DB_DBT_REALLOC DB_DBT_REALLOC_ydb
#define DB_DBT_USERMEM DB_DBT_USERMEM_ydb
#define DB_DBT_DUPOK DB_DBT_DUPOK_ydb
#define DB_VERSION_STRING DB_VERSION_STRING_ydb
#define DB_ARCH_ABS DB_ARCH_ABS_ydb
#define DB_ARCH_LOG DB_ARCH_LOG_ydb
#define DB_FIRST DB_FIRST_ydb
#define DB_GET_BOTH DB_GET_BOTH_ydb
#define DB_LAST DB_LAST_ydb
#define DB_NEXT DB_NEXT_ydb
#define DB_NEXT_DUP DB_NEXT_DUP_ydb
#define DB_PREV DB_PREV_ydb
#define DB_SET DB_SET_ydb
#define DB_SET_RANGE DB_SET_RANGE_ydb
#define DB_RMW DB_RMW_ydb
#define DB_KEYEMPTY DB_KEYEMPTY_ydb
#define DB_KEYEXIST DB_KEYEXIST_ydb
#define DB_LOCK_DEADLOCK DB_LOCK_DEADLOCK_ydb
#define DB_NOTFOUND DB_NOTFOUND_ydb
#define DB_CREATE DB_CREATE_ydb
#define DB_RDONLY DB_RDONLY_ydb
#define DB_RECOVER DB_RECOVER_ydb
#define DB_THREAD DB_THREAD_ydb
#define DB_TXN_NOSYNC DB_TXN_NOSYNC_ydb
#define DB_PRIVATE DB_PRIVATE_ydb
#define DB_LOCK_DEFAULT DB_LOCK_DEFAULT_ydb
#define DB_LOCK_OLDEST DB_LOCK_OLDEST_ydb
#define DB_LOCK_RANDOM DB_LOCK_RANDOM_ydb
#define DB_DUP DB_DUP_ydb
#define DB_NOOVERWRITE DB_NOOVERWRITE_ydb
#define DB_INIT_LOCK DB_INIT_LOCK_ydb
#define DB_INIT_LOG DB_INIT_LOG_ydb
#define DB_INIT_MPOOL DB_INIT_MPOOL_ydb
#define DB_INIT_TXN DB_INIT_TXN_ydb
#define db_create db_create_ydb
#define db_env_create db_env_create_ydb
#define txn_begin txn_begin_ydb
#define txn_commit txn_commit_ydb
#define txn_abort txn_abort_ydb
#define log_compare log_compare_ydb
#include "../include/db.h"
#undef DB_BTREE
#undef DB_NOTICE_LOGFILE_CHANGED
#undef DBTYPE
#undef db_notices
#undef txn_abort
#undef txn_begin
#undef txn_commit
#undef DB_VERB_CHKPOINT
#undef DB_VERB_DEADLOCK
#undef DB_VERB_RECOVERY
#undef DB
#undef DB_BTREE_STAT
#undef DB_ENV
#undef DB_KEY_RANGE
#undef DB_LSN
#undef DB_TXN
#undef DB_TXN_ACTIVE
#undef DB_TXN_STAT
#undef DBC
#undef DBT
#undef DB_DBT_MALLOC
#undef DB_DBT_REALLOC
#undef DB_DBT_USERMEM
#undef DB_DBT_DUPOK
#undef DB_VERSION_STRING
#undef DB_ARCH_ABS
#undef DB_ARCH_LOG
#undef DB_FIRST
#undef DB_GET_BOTH
#undef DB_LAST
#undef DB_NEXT
#undef DB_NEXT_DUP
#undef DB_PREV
#undef DB_SET
#undef DB_SET_RANGE
#undef DB_RMW
#undef DB_KEYEMPTY
#undef DB_KEYEXIST
#undef DB_LOCK_DEADLOCK
#undef DB_NOTFOUND
#undef DB_CREATE
#undef DB_RDONLY
#undef DB_RECOVER
#undef DB_THREAD
#undef DB_TXN_NOSYNC
#undef DB_PRIVATE
#undef DB_LOCK_DEFAULT
#undef DB_LOCK_OLDEST
#undef DB_LOCK_RANDOM
#undef DB_DUP
#undef DB_NOOVERWRITE
#undef DB_INIT_LOCK
#undef DB_INIT_LOG
#undef DB_INIT_MPOOL
#undef DB_INIT_TXN
#undef db_create
#undef db_env_create
#undef txn_begin
#undef txn_commit
#undef txn_abort
#undef log_compare
#endif
/* This version is what Mysql calls.
* It invokes the version in bdbw.
* The version in bdbw then converts to Berkeley DB Calls. */
#include <sys/types.h>
/* This include is to the ydb include, which is what mysql sees. */
#include <db.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <assert.h>
#include "bdbw.h"
#define barf() ({ fprintf(stderr, "YDB: BARF %s:%d in %s\n", __FILE__, __LINE__, __func__); })
#define barff(fmt,...) ({ fprintf(stderr, "YDB: BARF %s:%d in %s, ", __FILE__, __LINE__, __func__); fprintf(stderr, fmt, __VA_ARGS__); })
#define note() ({ fprintf(stderr, "YDB: Note %s:%d in %s\n", __FILE__, __LINE__, __func__); })
#define notef(fmt,...) ({ fprintf(stderr, "YDB: Note %s:%d in %s, ", __FILE__, __LINE__, __func__); fprintf(stderr, fmt, __VA_ARGS__); })
int db_env_create (DB_ENV **envp, u_int32_t flags) {
return db_env_create_bdbw(envp, flags);
}
int txn_abort (DB_TXN *txn) {
return txn_abort_bdbw(txn);
}
int txn_begin (DB_ENV *env, DB_TXN *stxn, DB_TXN **txn, u_int32_t flags) {
return txn_begin_bdbw(env, stxn, txn, flags);
}
int txn_commit (DB_TXN *txn, u_int32_t flags) {
return txn_commit_bdbw(txn, flags);
}
struct ydb_db_internal {
int foo;
};
void print_flags (u_int32_t flags) {
u_int32_t gotit=0;
int doneone=0;
#define doit(flag) if (flag & flags) { if (doneone) printf(" | "); printf("%s", #flag); doneone=1; gotit|=flag; }
printf(" flags=");
doit(DB_INIT_LOCK);
doit(DB_INIT_LOG);
doit(DB_INIT_MPOOL);
doit(DB_INIT_TXN);
doit(DB_CREATE);
doit(DB_THREAD);
doit(DB_RECOVER);
doit(DB_PRIVATE);
if (gotit!=flags) printf(" flags 0x%x not accounted for", flags&~gotit);
printf("\n");
}
int log_compare (const DB_LSN *a, const DB_LSN *b) {
fprintf(stderr, "%s:%d log_compare(%p,%p)\n", __FILE__, __LINE__, a, b);
abort();
}
static int yobi_db_close (DB *db, u_int32_t flags) {
barf();
abort();
}
int yobi_db_cursor (DB *db, DB_TXN *txn, DBC **c, u_int32_t flags) {
barf();
abort();
}
int yobi_db_del (DB *db, DB_TXN *txn, DBT *dbt, u_int32_t flags) {
barf();
abort();
}
int yobi_db_get (DB *db, DB_TXN *txn, DBT *dbta, DBT *dbtb, u_int32_t flags) {
barf();
abort();
}
int yobi_db_key_range (DB *db, DB_TXN *txn, DBT *dbt, DB_KEY_RANGE *kr, u_int32_t flags) {
barf();
abort();
}
int yobi_db_open (DB *db, DB_TXN *txn, const char *fname, const char *dbname, DBTYPE dbtype, u_int32_t flags, int mode) {
notef("txn=%p fname=%s dbname=%s dbtype=%d flags=0x%x mode=0%o\n", txn, fname, dbname, dbtype, flags, mode);
print_flags(flags);
return 0;
}
int yobi_db_put (DB *db, DB_TXN *txn, DBT *dbta, DBT *dbtb, u_int32_t flags) {
barf();
abort();
}
int yobi_db_remove (DB *db, const char *fname, const char *dbname, u_int32_t flags) {
barf();
abort();
}
int yobi_db_rename (DB *db, const char *namea, const char *nameb, const char *namec, u_int32_t flags) {
barf();
abort();
}
int yobi_db_set_flags (DB *db, u_int32_t flags) {
barf();
abort();
}
int yobi_db_stat (DB *db, void *v, u_int32_t flags) {
barf();
abort();
}
int db_create (DB **db, DB_ENV *env, u_int32_t flags) {
return db_create_bdbw(db, env, flags);
}
CFLAGS = -W -Wall -Wno-unused -g
CPPFLAGS = -I../include -I../newbrt
ydb.o: ../include/db.h ../newbrt/cachetable.h
libdb.so: ydb.c
cc $(CPPFLAGS) ydb.c -shared -fPIC -o libdb.so $(CFLAGS)
libdb.a(ydb.o): ydb.o
cd ~/yobiduck/ydb/src
make
cd ~/mysql/bdbi/mysql-5.0.27/
export LD_RUN_PATH=/home/bradley/yobiduck/ydb/src
./configure --with-berkeley-db-includes=/home/bradley/yobiduck/ydb/include --with-berkeley-db --with-berkeley-db-libs=/home/bradley/yobiduck/ydb/src --prefix=/home/bradley/usr
make
make install
#This one may not be needed
~/mysql/bdbi/usr/bin/mysql_install_db
#
pushd /home/bradley/mysql/bdbi/usr/ ; /home/bradley/mysql/bdbi/usr//bin/mysqld_safe &
popd
~/mysql/bdbi/usr/bin/mysql -u root
mysql> show databases;
mysql> create database yobitest;
mysql> use yobitest;
mysql> create table t1 (i int) engine=bdb;
Look for the error in /home/bradley/mysql/bdbi/usr/var/yobert.err
----
This links right:
LD_LIBRARY_PATH=/home/bradley/mysql/bdbi/usr/lib/mysql/ ldd sql/mysqld
----
This works,
LD_LIBRARY_PATH=/home/bradley/mysql/bdbi/usr/lib/mysql/ /home/bradley/mysql/bdbi/usr//bin/mysqld
producing the following in the log
061208 16:11:35 InnoDB: Started; log sequence number 0 43655
ydb.c:78 db_env_create flags=0
----
the LD_RUN_PATH thing above works.
--- on laptop I did this instead:
export LD_RUN_PATH=/home/bradley/yobiduck/ydb/src
./configure --with-berkeley-db-includes=/home/bradley/yobiduck/ydb/include --with-berkeley-db --with-berkeley-db-libs=/home/bradley/yobiduck/src --prefix=/home/bradley/usr
make
make install
/home/bradley/usr/bin/mysql_install_db
/home/bradley/usr/bin/mysqld_safe &
/home/bradley/usr/bin/mysql -u root
~/mysql/bdbi/usr/bin/mysql -u root
mysql> show databases;
mysql> create database yobitest;
mysql> use yobitest;
mysql> create table t1 (i int) engine=bdb;
Look for the error in ~/usr/var/localhost.localdomain.err
---
To clean up after a total screwup:
rm -rf ~/usr/var/
Didn't manage to clean it up very well.
---
Goal: compiler mysql wiht debugging
export LD_RUN_PATH=/home/bradley/yobiduck/ydb/src
./configure CFLAGS="-g -O2" --with-berkeley-db-includes=/home/bradley/yobiduck/ydb/include --with-berkeley-db --with-berkeley-db-libs=/home/bradley/yobiduck/src --prefix=/home/bradley/usr
If you want to debug, you might need to start mysqld without using mysqld_safe.
Here is one way to do it:
gdb ~/usr/libexec/mysqld
(gdb) run --basedir=/home/bradley/usr --datadir=/home/bradley/usr/var --pid-file=/home/bradley/usr/var/localhost.localdomain.pid --skip-external-locking
That was screwed up (the configure args were wrong.) Try again:
---
export LD_RUN_PATH=/home/bradley/yobiduck/ydb/src
./configure CXXFLAGS="-g -O2" CFLAGS="-g -O2" --with-berkeley-db-includes=/home/bradley/yobiduck/ydb/include --with-berkeley-db --with-berkeley-db-libs=/home/bradley/yobiduck/ydb/src --prefix=/home/bradley/usr
gdb ~/usr/libexec/mysqld
(gdb) run --basedir=/home/bradley/usr --datadir=/home/bradley/usr/var --pid-file=/home/bradley/usr/var/localhost.localdomain.pid --skip-external-locking
/* -*- mode: C; c-basic-offset: 4 -*- */
#include <sys/types.h>
#include <db.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <assert.h>
#include <brt.h>
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <errno.h>
#include "cachetable.h"
struct db_header {
int n_databases; // Or there can be >=1 named databases. This is the count.
char *database_names; // These are the names
BRT *database_brts; // These
};
struct ydb_db_internal {
int freed;
int (*bt_compare)(DB *, const DBT *, const DBT *);
struct db_header *header;
int database_number; // -1 if it is the single unnamed database. Nonnengative number otherwise.
DB_ENV *env;
char *full_fname;
char *database_name;
//int fd;
u_int32_t open_flags;
int open_mode;
BRT brt;
};
void yobi_db_env_err (const DB_ENV *env __attribute__((__unused__)), int error, const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "YDB Error %d:", error);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
#define barf() ({ fprintf(stderr, "YDB: BARF %s:%d in %s\n", __FILE__, __LINE__, __func__); })
#define barff(fmt,...) ({ fprintf(stderr, "YDB: BARF %s:%d in %s, ", __FILE__, __LINE__, __func__); fprintf(stderr, fmt, __VA_ARGS__); })
#define note() ({ fprintf(stderr, "YDB: Note %s:%d in %s\n", __FILE__, __LINE__, __func__); })
#define notef(fmt,...) ({ fprintf(stderr, "YDB: Note %s:%d in %s, ", __FILE__, __LINE__, __func__); fprintf(stderr, fmt, __VA_ARGS__); })
void print_flags (u_int32_t flags) {
u_int32_t gotit=0;
int doneone=0;
#define doit(flag) if (flag & flags) { if (doneone) printf(" | "); printf("%s", #flag); doneone=1; gotit|=flag; }
printf(" flags=");
doit(DB_INIT_LOCK);
doit(DB_INIT_LOG);
doit(DB_INIT_MPOOL);
doit(DB_INIT_TXN);
doit(DB_CREATE);
doit(DB_THREAD);
doit(DB_RECOVER);
doit(DB_PRIVATE);
if (gotit!=flags) printf(" flags 0x%x not accounted for", flags&~gotit);
printf("\n");
}
struct db_env_ydb_internal {
u_int32_t open_flags;
int open_mode;
void (*errcall)(const char *, char *);
const char *errpfx;
char *dir; /* A malloc'd copy of the directory. */
char *tmp_dir;
void (*noticecall)(DB_ENV *, db_notices);
int n_files;
int files_array_limit; // How big is *files ?
struct ydb_file **files;
CACHETABLE cachetable;
};
int yobi_db_env_open (DB_ENV *env, const char *home, u_int32_t flags, int mode) {
int r;
notef("(%p, \"%s\", 0x%x, 0%o)\n", env, home, flags, mode);
env->i->dir = strdup(home);
env->i->open_flags = flags;
env->i->open_mode = mode;
print_flags(flags);
assert(DB_PRIVATE & flags); // This means that we don't have to do anything with shared memory. And that's good enough for mysql.
r = brt_create_cachetable(&env->i->cachetable, 32);
assert(r==0);
return 0;
}
int yobi_db_env_close (DB_ENV * env, u_int32_t flags) {
barf();
return 1;
}
int yobi_db_env_log_archive (DB_ENV *env, char **list[], u_int32_t flags) {
*list = NULL;
return 0;
}
int yobi_db_env_log_flush (DB_ENV * env, const DB_LSN * lsn) {
barf();
return 1;
}
int yobi_db_env_set_cachesize (DB_ENV * env, u_int32_t gbytes, u_int32_t bytes, int ncache) {
barf();
return 1;
}
int yobi_db_env_set_data_dir (DB_ENV * env, const char *dir) {
barf();
return 1;
}
void yobi_db_env_set_errcall (DB_ENV *env, void (*errcall)(const char *, char *)) {
env->i->errcall=errcall;
}
void yobi_db_env_set_errpfx (DB_ENV * env, const char *errpfx) {
env->i->errpfx = strdup(errpfx);
}
int yobi_db_env_set_flags (DB_ENV *env, u_int32_t flags, int onoff) {
barf();
return 1;
}
int yobi_db_env_set_lg_bsize (DB_ENV * env, u_int32_t bsize) {
barf();
return 1;
}
int yobi_db_env_set_lg_dir (DB_ENV * env, const char * dir) {
barf();
return 1;
}
int yobi_db_env_set_lg_max (DB_ENV *env, u_int32_t lg_max) {
barf();
return 1;
}
int yobi_db_env_set_lk_detect (DB_ENV *env, u_int32_t detect) {
barf();
return 1;
}
int yobi_db_env_set_lk_max (DB_ENV *env, u_int32_t lk_max) {
barf();
return 1;
}
void yobi_db_env_set_noticecall (DB_ENV *env, void (*noticecall)(DB_ENV *, db_notices)) {
env->i->noticecall = noticecall;
}
int yobi_db_env_set_tmp_dir (DB_ENV * env, const char *tmp_dir) {
env->i->tmp_dir = strdup(tmp_dir);
return 0;
}
int yobi_db_env_set_verbose (DB_ENV *env, u_int32_t which, int onoff) {
barf();
return 1;
}
int yobi_db_env_txn_checkpoint (DB_ENV *env, u_int32_t kbyte, u_int32_t min, u_int32_t flags) {
return 0;
}
int yobi_db_env_txn_stat (DB_ENV *env, DB_TXN_STAT **statp, u_int32_t flags) {
barf();
return 1;
}
void yobi_default_errcall(const char *errpfx, char *msg) {
fprintf(stderr, "YDB: %s: %s", errpfx, msg);
}
int db_env_create (DB_ENV **envp, u_int32_t flags) {
DB_ENV *result=malloc(sizeof(*result));
fprintf(stderr, "%s:%d db_env_create flags=%d, returning %p\n", __FILE__, __LINE__, flags, result);
result->err = yobi_db_env_err;
result->open = yobi_db_env_open;
result->close = yobi_db_env_close;
result->txn_checkpoint = yobi_db_env_txn_checkpoint;
result->log_flush = yobi_db_env_log_flush;
result->set_errcall = yobi_db_env_set_errcall;
result->set_errpfx = yobi_db_env_set_errpfx;
result->set_noticecall = yobi_db_env_set_noticecall;
result->set_flags = yobi_db_env_set_flags;
result->set_data_dir = yobi_db_env_set_data_dir;
result->set_tmp_dir = yobi_db_env_set_tmp_dir;
result->set_verbose = yobi_db_env_set_verbose;
result->set_lg_bsize = yobi_db_env_set_lg_bsize;
result->set_lg_dir = yobi_db_env_set_lg_dir;
result->set_lg_max = yobi_db_env_set_lg_max;
result->set_cachesize = yobi_db_env_set_cachesize;
result->set_lk_detect = yobi_db_env_set_lk_detect;
result->set_lk_max = yobi_db_env_set_lk_max;
result->log_archive = yobi_db_env_log_archive;
result->txn_stat = yobi_db_env_txn_stat;
result->txn_begin = txn_begin;
result->i = malloc(sizeof(*result->i));
result->i->dir = 0;
result->i->noticecall = 0;
result->i->tmp_dir = 0;
result->i->errcall = yobi_default_errcall;
result->i->errpfx = "";
result->i->n_files = 0;
result->i->files_array_limit = 4;
result->i->files = malloc(result->i->files_array_limit*sizeof(*result->i->files));
*envp = result;
return 0;
}
int yobi_db_txn_commit (DB_TXN *txn, u_int32_t flags) {
notef("flags=%d\n", flags);
return 0;
}
u_int32_t yobi_db_txn_id (DB_TXN *txn) {
barf();
abort();
}
int txn_begin (DB_ENV *env, DB_TXN *stxn, DB_TXN **txn, u_int32_t flags) {
DB_TXN *result = malloc(sizeof(*result));
notef("parent=%p flags=0x%x\n", stxn, flags);
result->commit = yobi_db_txn_commit;
result->id = yobi_db_txn_id;
*txn = result;
return 0;
}
int txn_abort (DB_TXN *txn) {
fprintf(stderr, "txn_abort(%p)\n", txn);
abort();
}
int txn_commit (DB_TXN *txn, u_int32_t flags) {
fprintf(stderr, "%s:%d txn_commit(%p,%ud)\n", __FILE__, __LINE__, txn, flags);
abort();
}
int log_compare (const DB_LSN *a, const DB_LSN *b) {
fprintf(stderr, "%s:%d log_compare(%p,%p)\n", __FILE__, __LINE__, a, b);
abort();
}
int yobi_db_close (DB *db, u_int32_t flags) {
int r = close_brt(db->i->brt);
printf("%s:%d %d=yobi_db_close(%p)\n", __FILE__, __LINE__, r, db);
db->i->freed = 1;
return r;
}
struct yobi_dbc_internal {
BRT_CURSOR c;
DB *db;
};
int yobi_c_get (DBC *c, DBT *key, DBT *data, u_int32_t flag) {
return brt_c_get(c->i->c, key, data, flag);
}
int yobi_c_close (DBC *c) {
int r = brt_cursor_close(c->i->c);
printf("%s:%d %d=yobi_c_close(%p)\n", __FILE__, __LINE__, r, c);
return r;
}
int yobi_c_del (DBC *c, u_int32_t flags) {
barf();
return 0;
}
int yobi_db_cursor (DB *db, DB_TXN *txn, DBC **c, u_int32_t flags) {
DBC *result=malloc(sizeof(*result));
int r;
assert(result);
result->c_get = yobi_c_get;
result->c_close = yobi_c_close;
result->c_del = yobi_c_del;
result->i = malloc(sizeof(*result->i));
result->i->db = db;
r = brt_cursor(db->i->brt, &result->i->c);
assert(r==0);
*c = result;
return 0;
}
int yobi_db_del (DB *db, DB_TXN *txn, DBT *dbt, u_int32_t flags) {
barf();
abort();
}
int yobi_db_get (DB *db, DB_TXN *txn, DBT *dbta, DBT *dbtb, u_int32_t flags) {
barf();
abort();
}
int yobi_db_key_range (DB *db, DB_TXN *txn, DBT *dbt, DB_KEY_RANGE *kr, u_int32_t flags) {
barf();
abort();
}
char *construct_full_name (const char *dir, const char *fname) {
if (fname[0]=='/')
dir = "";
{
int dirlen = strlen(dir);
int fnamelen = strlen(fname);
int len = dirlen+fnamelen+2; // One for the / between (which may not be there). One for the trailing null.
char *result = malloc(len);
int l;
printf("%s:%d len(%d)=%d+%d+2\n", __FILE__, __LINE__, len, dirlen, fnamelen);
assert(result);
l=snprintf(result, len, "%s", dir);
if (l==0 || result[l-1]!='/') {
/* Didn't put a slash down. */
if (fname[0]!='/') {
result[l++]='/';
result[l]=0;
}
}
l+=snprintf(result+l, len-l, "%s", fname);
return result;
}
}
// The decision to embedded subdatabases in files is a little bit painful.
// My original design was to simply create another file, but it turns out that we
// have to inherit mode bits and so forth from the first file that was created.
// Other problems may ensue (who is responsible for deleting the file? That's not so bad actually.)
// This suggests that we really need to put the multiple databases into one file.
int yobi_db_open (DB *db, DB_TXN *txn, const char *fname, const char *dbname, DBTYPE dbtype, u_int32_t flags, int mode) {
// Warning. Should check arguments. Should check return codes on malloc and open and so forth.
int openflags=0;
int r;
notef("txn=%p fname=%s dbname=%s dbtype=%d flags=0x%x mode=0%o\n", txn, fname, dbname, dbtype, flags, mode);
print_flags(flags);
if (db->i->full_fname) return -1; /* It was already open. */
db->i->full_fname = construct_full_name(db->i->env->i->dir, fname);
printf("Full name = %s\n", db->i->full_fname);
db->i->database_name = strdup(dbname);
if (flags&DB_RDONLY) openflags |= O_RDONLY;
else openflags |= O_RDWR;
if (flags&DB_CREATE) openflags |= O_CREAT;
{
struct stat statbuf;
if (stat(db->i->full_fname, &statbuf)==0) {
/* If the database exists at the file level, and we specified no db_name, then complain here. */
if (dbname==0 && (flags&DB_CREATE)) return EEXIST;
} else {
if (!(flags&DB_CREATE)) return ENOENT;
}
}
db->i->open_flags = flags;
db->i->open_mode = mode;
// Warning: new_brt has deficienceis:
// Each tree has its own cache, instead of a big shared cache.
// It doesn't do error checking on insert.
// It's tough to do cursors.
r=open_brt(db->i->full_fname, dbname, (flags&DB_CREATE), &db->i->brt, 1<<20, db->i->env->i->cachetable);
assert(r==0);
return 0;
}
int yobi_db_put (DB *db, DB_TXN *txn, DBT *dbta, DBT *dbtb, u_int32_t flags) {
int r = brt_insert(db->i->brt, dbta->data, dbta->size, dbtb->data, dbtb->size);
printf("%s:%d %d=yobi_db_put(...)\n", __FILE__, __LINE__, r);
return r;
}
int yobi_db_remove (DB *db, const char *fname, const char *dbname, u_int32_t flags) {
barf();
abort();
}
int yobi_db_rename (DB *db, const char *namea, const char *nameb, const char *namec, u_int32_t flags) {
barf();
abort();
}
int yobi_db_set_bt_compare (DB *db, int (*bt_compare)(DB *, const DBT *, const DBT *)) {
note();
db->i->bt_compare = bt_compare;
return 0;
}
int yobi_db_set_flags (DB *db, u_int32_t flags) {
assert(flags==0);
return 0;
}
int yobi_db_stat (DB *db, void *v, u_int32_t flags) {
barf();
abort();
}
int db_create (DB **db, DB_ENV *env, u_int32_t flags) {
DB *result=malloc(sizeof(*result));
fprintf(stderr, "%s:%d db_create(%p, %p, 0x%x)\n", __FILE__, __LINE__, db, env, flags);
print_flags(flags);
result->app_private = 0;
result->close = yobi_db_close;
result->cursor = yobi_db_cursor;
result->del = yobi_db_del;
result->get = yobi_db_get;
result->key_range = yobi_db_key_range;
result->open = yobi_db_open;
result->put = yobi_db_put;
result->remove = yobi_db_remove;
result->rename = yobi_db_rename;
result->set_bt_compare = yobi_db_set_bt_compare;
result->set_flags = yobi_db_set_flags;
result->stat = yobi_db_stat;
result->i = malloc(sizeof(*result->i));
result->i->env = env;
result->i->bt_compare = 0;
result->i->freed = 0;
result->i->full_fname = 0;
*db = result;
return 0;
}
#include "../include/db.h"
#include <assert.h>
#include <stdio.h>
int main (int argc, char *argv[]) {
char *fname;
assert(argc==2);
fname = argv[1];
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment