Commit 4461dceb authored by Yoni Fogel's avatar Yoni Fogel

Addresses #596

Updated .h file pursuant to Bradley's comments

git-svn-id: file:///svn/tokudb@3518 c7de825b-a66e-492c-adef-691d508d4ae1
parent 6609d172
...@@ -3,204 +3,165 @@ ...@@ -3,204 +3,165 @@
#ident "Copyright (c) 2007 Tokutek Inc. All rights reserved." #ident "Copyright (c) 2007 Tokutek Inc. All rights reserved."
/* I'm writing this in C to demonstrate how it is used. We can implement it // Order Maintenance Array (OMA)
later either using void*s //
or templates under the hood. */ // Maintains a collection of totally ordered values, where each value has an integer weight.
// The OMA is a mutable datatype.
/* I've made the following assumptions which very well might be wrong. //
// The Abstraction:
1: We are storing key/value pairs, not just keys. //
1a: We want to abstract a key/value pair to an OMITEM. // An OMA is a vector of values, $V$, where $|V|$ is the length of the vector.
2: OMITEM will NOT support telling you the index number (for now). // The vector is numbered from $0$ to $|V|-1$.
2a: Indexs (for purposes of logging) will be retrieved by an output // Each value has a weight. The weight of the $i$th element is denoted $w(V_i)$.
parameter. //
3: The CALLER of the OMS functions own the memory of the DBTs. // We can create a new OMA, which is the empty vector.
The OM structure will copy the OMITEM, but //
key.data and value.data will be owned by the caller // We can insert a new element $x$ into slot $i$, changing $V$ into $V'$ where
responsibility for freeing/etc belongs to the caller. // $|V'|=1+|V|$ and
Should not free anything till its been removed from teh OMS. //
4: I don't know what to call it so I'm just calling it 'oms_blah' // V'_j = V_j if $j<i$
5: We do not need to do multiple interleaving iterations. // x if $j=i$
5a: If we do, we need to change prototypes, perhaps pass a status object along. // V_{j-1} if $j>i$.
6: For inserting (with search), it will not replace already existing items //
it will just report that it was already inside. // We can specify $i$ using a kind of function instead of as an integer.
*/ // Let $b$ be a function mapping from values to nonzero integers, such that
// the signum of $b$ is monotically increasing.
/* This is my guess of what an OMITEM should be. */ // We can specify $i$ as the minimum integer such that $b(V_i)>0$.
typedef struct { //
DBT key; // We look up a value using its index, or using a Heaviside function.
DBT value; // For lookups, we allow $b$ to be zero for some values, and again the signum of $b$ must be monotonically increasing.
} OMITEM; // When lookup up values, we can look up
// $V_i$ where $i$ is the minimum integer such that $b(V_i)=0$. (With a special return code if no such value exists.)
/* // (Rationale: Ordinarily we want $i$ to be unique. But for various reasons we want to allow multiple zeros, and we want the smallest $i$ in that case.)
Create an empty OMS. // $V_i$ where $i$ is the minimum integer such that $b(V_i)>0$. (Or an indication that no such value exists.)
// $V_i$ where $i$ is the maximum integer such that $b(V_i)<0$. (Or an indication that no such value exists.)
Possible Error codes //
0 // When looking up a value using a Heaviside function, we get the value and its index.
ENOMEM //
Will assert ptree, db, cmp are NOT NULL. // We can also split an OMA into two OMAs, splitting the weight of the values evenly.
*/ // Find a value $j$ such that the values to the left of $j$ have about the same total weight as the values to the right of $j$.
int oms_create(OMS** ptree, // The resulting two OMAs contain the values to the left of $j$ and the values to the right of $j$ respectively.
DB* db, int (*cmp)(DB*, const OMITEM*, const OMITEM*)); // All of the values from the original OMA go into one of the new OMAs.
// If the weights of the values don't split exactly evenly, then the implementation has the freedom to choose whether
/* // the new left OMA or the new right OMA is larger.
Create an OMS containing the elements in a presorted array. //
// Performance:
Possible Error codes // Insertion and deletion should run with $O(\log |V|)$ time and $O(\log |V|)$ calls to the Heaviside function.
0 // The memory required is O(|V|).
ENOMEM //
Will assert ptree, db, cmp, items are NOT NULL. // The programming API:
*/
int oms_create_from_presorted_array(OMS** ptree, DB* db, typedef struct value *OMAVALUE; // A slight improvement over using void*.
int (*cmp)(DB*, const OMITEM*, const OMITEM*), typedef struct oma *OMA;
OMITEM* items, u_int32_t num_items);
int toku_oma_create (OMA *omap);
/* // Effect: Create an empty OMA. Stores it in *omap.
Create an OMS containing presorted elements accessed by an iterator. // Returns:
// 0 success
Possible Error codes // ENOMEM out of memory (and doesn't modify *omap)
0 // Performance: constant time.
ENOMEM
Will assert ptree is NOT NULL. int toku_oma_create_from_sorted_array(OMA* omap, OMAVALUE *values, u_int32_t numvalues);
// Effect: Create a OMA containing values. The number of values is in numvalues.
NOTE: I'm using void* here cause I don't know what the parameters should be. // Stores the new OMA in *omap.
In the actual implementation I will use the real data types. // Returns:
We can also change the iterator type, i.e. make it return int // 0 success
and we get next via an output parameter. // ENOMEM out of memory (and doesn't modify *omap)
// Performance: time=O(numvalues)
Note: May just be a wrapper for oms_create_presorted_array.
void toku_oma_destroy(OMA *omap);
Will assert ptree, db, cmp, items are NOT NULL. // Effect: Destroy an OMA, freeing all its memory.
*/ // Does not free the OMAVALUEs stored in the OMA.
int oms_create_from_presorted_iterator(OMS** ptree, DB* db, // Those values may be freed before or after calling toku_oma_destroy.
int (*cmp)(DB*, const OMITEM*, const OMITEM*), // Also sets *omap=NULL.
OMITEM* (*get_next)(void* param)); // Rationale: The usage is to do something like
// toku_oma_destroy(&s->oma);
/* // and now s->oma will have a NULL pointer instead of a dangling freed pointer.
Close/free an OMS. // Rationale: Returns no values since free() cannot fail.
Note: This will not free key.data/value.data for entries inside. // Performance: time=O(toku_oma_size(*omap))
Those should be freed immediately before or after calling oms_destroy.
u_int32_t toku_oma_size(OMA V);
Will assert tree is NOT NULL. // Effect: return |V|.
*/ // Performance: time=O(1)
void oms_destroy(OMS* tree);
int toku_oma_iterate(OMA oma, int (*f)(OMAVALUE, u_int32_t, void*), void*v);
/* // Effect: Iterate over the values of the oma, from left to right, calling f on each value.
NOTE: USES THE COMPARISON FUNCTION // The second argument passed to f is the index of the value.
Initializes iteration over the tree. // The third argument passed to f is v.
if start is NULL, we start at the head, otherwise we search for it. // The indices run from 0 (inclusive) to toku_oma_size(oma) (exclusive).
Searching requires a comparison function! // Returns:
// If f ever returns nonzero, then the iteration stops, and the value returned by f is returned by toku_oma_iterate.
Will assert tree is NOT NULL. // If f always returns zero, then toku_oma_iterate returns 0.
// Requires: Don't modify oma while running. (E.g., f may not insert or delete values form oma.)
if not found, it will allow you to find // Performance: time=O(i+\log N) where i is the number of times f is called, and N is the number of elements in oma.
*/
void oms_init_iteration(OMS* tree, OMITEM* start); int toku_oma_insert_at(OMA oma, OMAVALUE value, u_int32_t index);
// Effect: Insert value into the position at index, moving everything to the right up one slot.
/* // Returns:
Initializes iteration over the tree. // 0 success
if start is NULL, we start at the head, otherwise we search for it. // ERANGE if index>toku_oma_size(oma)
Searching requires a comparison function! // ENOMEM
// On error, oma is unchanged.
Will assert tree is NOT NULL. // Performance: time=O(\log N) amortized time.
// Rationale: Some future implementation may be O(\log N) worst-case time, but O(\log N) amortized is good enough for now.
Possible error codes
0 int toku_oma_insert(OMA oma, OMAVALUE value, int(*h)(OMAVALUE, void*v), void *v, u_int32_t* index);
ERANGE: If start_index >= the number of elements in the structure // Effect: Insert value into the OMA.
*/ // If there is some i such that $h(V_i, v)=0$ then returns DB_KEYEXIST.
int oms_init_iteration_at(OMS* tree, u_int32_t start_index); // Otherwise, let i be the minimum value such that $h(V_i, v)>0$. Then this has the same effect as
// oma_insert_at(tree, vlaue, i);
/* // Requires: The signum of h must be monotonically increasing.
Gets the next item in the tree. // Returns:
When you go off the end, it returns NULL, as will subsequent calls. // 0 success
// DB_KEYEXIST the key is present (h was equal to zero for some value)
Use oms_init_iteration(_at) to reset the iterator. // ENOMEM
*/ // On nonzero return, oma is unchanged.
OMITEM* oms_get_next(OMS* tree); // Performance: time=O(\log N) amortized.
/* int toku_oma_delete_at(OMA oma, u_int32_t index);
NOTE: USES THE COMPARISON FUNCTION // Effect: Delete the item in slot index.
Insert an item at the appropriate place. // Returns
// 0 success
Will assert tree, item, and already_exists are NOT NULL. // ERANGE if index out of range
already_exists is an out parameter. // ENOMEM
If the exact OMITEM is already there, it will NOT be replaced, // On error, oma is unchanged.
but we will report that. // Rationale: To delete an item, first find its index using toku_oma_find, then delete it.
Reports the index it was found at. // Performance: time=O(\log N) amortized.
Possible error codes:
0 int toku_oma_find_index (OMA V, u_int32_t i, VALUE *v);
ENOMEM // Effect: Set *v=V_i
DB_KEYEXIST: If it already exists in the structure. // Returns 0 on success
*/ // ERANGE if i out of range (and doesn't modify v)
int oms_insert(OMS* tree, OMITEM* item, u_int32_t* index); // Performance: time=O(\log N)
/* int toku_oma_find(OMA V, int (*h)(VALUE, void*extra), void*extra, int direction, VALUE *value, u_int32_t *index);
Insert an item at a given index. // Effect:
// If direction==0 then find the smallest i such that h(V_i,extra)==0.
Will assert tree, item, and already_exists are NOT NULL. // If direction>0 then find the smallest i such that h(V_i,extra)>0.
already_exists is an out parameter. // If direction<0 then find the largest i such that h(V_i,extra)<0.
If the exact OMITEM is already there, it will NOT be replaced, // If no such vlaue is found, then return DB_NOTFOUND,
but we will report that. // otherwise return 0 and set *value=V_i and set *index=i.
// Performance: time=O(\log N)
Possible error codes:
0 int toku_oma_split_at(OMA oma, OMA *newoma, u_itn32_t index);
ENOMEM // Effect: Create a new OMA, storing it in *newoma.
*/ // The values to the right of index (starting at index) are moved to *newoma.
int oms_insert_at(OMS* tree, OMITEM* item, u_int32_t index); // Returns 0 on success,
// ERANGE if index out of range
/* // ENOMEM
NOTE: USES THE COMPARISON FUNCTION // On nonzero return, oma and *newoma are unmodified.
Deletes a given item. // Performance: time=O(n)
// Rationale: We don't need a split-evenly operation. We need to split items so that their total sizes
Will assert tree, item, and found are NOT NULL. // are even, and other similar splitting criteria. It's easy to split evenly by calling toku_oma_size(), and dividing by two.
Reports the index it was found at.
Possible error codes:
0
DB_NOTFOUND
*/
int oms_delete(OMS* tree, OMITEM* item, u_int32_t* index);
/*
Deletes the item at a given index.
Possible error codes:
0
ERANGE: If index >= num elements in the structure
*/
int oms_delete_at(OMS* tree, u_int32_t index);
/*
I don't know what kind of 'finds' we need here.
*/
int oms_find(OMS* tree, OMITEM* item, u_int32_t find_flags);
/*
Creates 2 new trees caused by splitting the current one evently.
Reports the split index.
Does NOT free the old one.
*/
int oms_split_evenly(OMS* tree, OMS** pleft_tree, OMS** pright_tree,
u_int32_t* index);
/*
Creates 2 new trees caused by splitting the current one at the
given index. (0..index-1) are in left, (index..end) are in right.
Does NOT free the old one.
*/
int oms_split_at(OMS* tree, OMS** pleft_tree, OMS** pright_tree,
u_int32_t index);
int toku_oma_merge(OMA leftoma, OMA rightoma, OMA *newoma);
/* // Effect: Appends leftoma and rightoma to produce a new oma.
Creates one tree from merging 2 of them. // Sets *newoma to the new oma.
Does not free the old one. // leftoma and rightoma are left unchanged.
reports the 'split index' that you would use to undo the operation. // Returns 0 on success
*/ // ENOMEM on out of memory.
int oms_merge(OMS** ptree, OMS* left_tree, OMS* right_tree, u_int32_t* index); // On error, nothing is modified.
// Performance: time=O(n) is acceptable, but one can imagine implementations that are O(\log n) worst-case.
u_int32_t oms_get_num_elements(OMS* tree);
#endif /* #ifndef OM_H */
\ No newline at end of file
#endif /* #ifndef OM_H */
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment