Commit f6131628 authored by Zardosht Kasheff's avatar Zardosht Kasheff Committed by Yoni Fogel

[t:3008], modify comments

git-svn-id: file:///svn/toku/tokudb@25152 c7de825b-a66e-492c-adef-691d508d4ae1
parent 2ef50f2e
...@@ -5,29 +5,8 @@ ...@@ -5,29 +5,8 @@
#ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it." #ident "The technology is licensed by the Massachusetts Institute of Technology, Rutgers State University of New Jersey, and the Research Foundation of State University of New York at Stony Brook under United States of America Serial No. 11/760379 and to the patents and/or patent applications resulting from it."
/* In the past, leaves simply contained key-value pairs. /*
* In this implementatoin, leaf values are more complex * Header file for Leafentries
* They can contain a committed value:
* - Which can be "not-present",
* - Or a key-value pair.
* They can contain a provisional value, which depends on whether a particular transaction commits or aborts.
* - A not-present value
* - Or a key-value pair.
* - Or there can be no provisional value at all (that is, the value doesn't depend on the transaction.)
* Note that if both the provisional value and the committed value are not-present, then there is simply no entry in the leaf.
* So let's enumerate the possibilities:
* committed pair A committed pair unaffected by any incomplete transaction.
* committed pair and provisional pair A committed pair to provisionally be replaced by a new pair.
* committed pair and provisional delete A committed pair that will be deleted
* provisional pair No committed pair, but if a provisional pair to add.
*
* In the case of a committed pair and a provisional pair, the key is the same in both cases. The value can be different.
*
* For DUPSORT databases, the key-value pair is everything, so we need only represent the key-value pair once. So the cases are
* committed pair
* committed pair provisionally deleted
* provisional pair
* The case of a committed pair and a provisional pair can be represented by a committed pair, since it doesn't matter whether the transction aborts or commits, the value is the same.
*/ */
#include <toku_portability.h> #include <toku_portability.h>
...@@ -63,6 +42,12 @@ extern "C" { ...@@ -63,6 +42,12 @@ extern "C" {
#if TOKU_WINDOWS #if TOKU_WINDOWS
#pragma pack(push, 1) #pragma pack(push, 1)
#endif #endif
//
// enum of possible values for LEAFENTRY->type field
//
enum { LE_CLEAN = 0, LE_MVCC = 1 };
struct __attribute__ ((__packed__)) leafentry { struct __attribute__ ((__packed__)) leafentry {
uint8_t type; uint8_t type;
uint32_t keylen; uint32_t keylen;
...@@ -70,23 +55,22 @@ struct __attribute__ ((__packed__)) leafentry { ...@@ -70,23 +55,22 @@ struct __attribute__ ((__packed__)) leafentry {
struct __attribute__ ((__packed__)) leafentry_clean { struct __attribute__ ((__packed__)) leafentry_clean {
uint32_t vallen; uint32_t vallen;
uint8_t key_val[0]; //Actual key, then actual val uint8_t key_val[0]; //Actual key, then actual val
} clean; } clean; // For the case where LEAFENTRY->type is LE_CLEAN
struct __attribute__ ((__packed__)) leafentry_mvcc { struct __attribute__ ((__packed__)) leafentry_mvcc {
uint32_t num_cxrs; uint32_t num_cxrs; // number of committed uxrs
uint8_t num_pxrs; uint8_t num_pxrs; // number of provisional uxrs
u_int8_t key_xrs[0]; //Actual key, u_int8_t key_xrs[0]; //Actual key,
//then interesting TXNIDs //then interesting TXNIDs
//then interesting lengths (type bit is MSB of length) //then interesting lengths (type bit is MSB of length)
//then interesting data //then interesting data
//then other transaction records //then other transaction records
} mvcc; } mvcc; // For the case where LEAFENTRY->type is LE_MVCC
} u; } u;
}; };
#if TOKU_WINDOWS #if TOKU_WINDOWS
#pragma pack(pop) #pragma pack(pop)
#endif #endif
enum { LE_CLEAN = 0, LE_MVCC = 1 };
#define LE_CLEAN_MEMSIZE(keylen, vallen) \ #define LE_CLEAN_MEMSIZE(keylen, vallen) \
(sizeof(((LEAFENTRY)NULL)->type) /* num_uxrs */ \ (sizeof(((LEAFENTRY)NULL)->type) /* num_uxrs */ \
+sizeof(((LEAFENTRY)NULL)->keylen) /* keylen */ \ +sizeof(((LEAFENTRY)NULL)->keylen) /* keylen */ \
...@@ -173,16 +157,16 @@ le_clean(uint8_t *key, uint32_t keylen, ...@@ -173,16 +157,16 @@ le_clean(uint8_t *key, uint32_t keylen,
//Callback contract: //Callback contract:
// Returns: // Returns:
// 0: Ignore this entry and go on to next one. // 0: Ignore this entry and go on to next one.
// TOKUDB_ACCEPT: Quit early, accept this transaction record and return appropriate data // TOKUDB_ACCEPT: Quit early, accept this transaction record and return appropriate data
// r|r!=0&&r!=TOKUDB_ACCEPT: Quit early, return r // r|r!=0&&r!=TOKUDB_ACCEPT: Quit early, return r
typedef int(*LE_ITERATE_CALLBACK)(TXNID id, TOKUTXN context); typedef int(*LE_ITERATE_CALLBACK)(TXNID id, TOKUTXN context);
int le_iterate_is_empty(LEAFENTRY le, LE_ITERATE_CALLBACK f, BOOL *is_empty, TOKUTXN context); int le_iterate_is_empty(LEAFENTRY le, LE_ITERATE_CALLBACK f, BOOL *is_empty, TOKUTXN context);
int le_iterate_val(LEAFENTRY le, LE_ITERATE_CALLBACK f, void** valpp, u_int32_t *vallenp, TOKUTXN context); int le_iterate_val(LEAFENTRY le, LE_ITERATE_CALLBACK f, void** valpp, u_int32_t *vallenp, TOKUTXN context);
#if defined(__cplusplus) || defined(__cilkplusplus) #if defined(__cplusplus) || defined(__cilkplusplus)
......
...@@ -585,8 +585,11 @@ le_pack(ULE ule, // data to be packed into new leafen ...@@ -585,8 +585,11 @@ le_pack(ULE ule, // data to be packed into new leafen
invariant(ule->uxrs[0].xid == TXNID_NONE); invariant(ule->uxrs[0].xid == TXNID_NONE);
int rval; int rval;
{ {
//If there are no 'insert' entries, return NO leafentry. // The unpacked leafentry may contain no inserts anywhere on its stack.
//uxrs[ule->num_cuxrs-1] is outermost (committed) // If so, then there IS no leafentry to pack, we should return NULL
// So, first we check the stack to see if there is any insert. If not,
// Then we can return NULL and exit the function, otherwise, we goto
// found_insert, and proceed with packing the leafentry
uint32_t i; uint32_t i;
for (i = 0; i < ule->num_cuxrs + ule->num_puxrs; i++) { for (i = 0; i < ule->num_cuxrs + ule->num_puxrs; i++) {
if (uxr_is_insert(&ule->uxrs[i])) { if (uxr_is_insert(&ule->uxrs[i])) {
...@@ -607,6 +610,7 @@ found_insert:; ...@@ -607,6 +610,7 @@ found_insert:;
//Universal data //Universal data
new_leafentry->keylen = toku_htod32(ule->keylen); new_leafentry->keylen = toku_htod32(ule->keylen);
//p always points to first unused byte after leafentry we are packing
u_int8_t *p; u_int8_t *p;
invariant(ule->num_cuxrs>0); invariant(ule->num_cuxrs>0);
//Type specific data //Type specific data
......
...@@ -49,8 +49,9 @@ typedef struct { // unpacked leaf entry ...@@ -49,8 +49,9 @@ typedef struct { // unpacked leaf entry
uint32_t keylen; uint32_t keylen;
void * keyp; void * keyp;
UXR_S uxrs_static[MAX_TRANSACTION_RECORDS*2]; // uxrs[0] is oldest committed (txn commit time, not txn start time), uxrs[num_cuxrs] is outermost provisional value (if any exist/num_puxrs > 0) UXR_S uxrs_static[MAX_TRANSACTION_RECORDS*2]; // uxrs[0] is oldest committed (txn commit time, not txn start time), uxrs[num_cuxrs] is outermost provisional value (if any exist/num_puxrs > 0)
UXR uxrs; //If num_cuxrs < MAX_TRANSACTION_RECORDS then &uxrs_static[0]. UXR uxrs; //If num_cuxrs < MAX_TRANSACTION_RECORDS then uxrs = &uxrs_static[0].
//Otherwise we use a dynamically allocated array of size num_cuxrs + 1 + MAX_TRANSATION_RECORD. //Otherwise we use a dynamically allocated array of size num_cuxrs + 1 + MAX_TRANSATION_RECORD.
//Felt that MAX_TRANSACTION_RECORD was good upper bound for number of committed UXRs we would be willing to allocate off stack.
} ULE_S, *ULE; } ULE_S, *ULE;
int apply_msg_to_leafentry(BRT_MSG msg, int apply_msg_to_leafentry(BRT_MSG msg,
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment