Commit 2041d749 authored by Yoni Fogel's avatar Yoni Fogel

Refs Tokutek/ft-index#46 Add some comments. Improve dmt::verify() to check...

Refs Tokutek/ft-index#46 Add some comments.  Improve dmt::verify() to check for more types of corruption.  More static checks
parent 8ab94bd1
...@@ -170,6 +170,7 @@ const unsigned int random_seed = 0xFEADACBA; ...@@ -170,6 +170,7 @@ const unsigned int random_seed = 0xFEADACBA;
static void fail_one_verify(uint32_t len, uint32_t num, vdmt *v) { static void fail_one_verify(uint32_t len, uint32_t num, vdmt *v) {
val_type* fetched_data; val_type* fetched_data;
int count = 0; int count = 0;
v->verify();
for (uint32_t i = 0; i < num; i++) { for (uint32_t i = 0; i < num; i++) {
uint32_t fetched_len; uint32_t fetched_len;
int r = v->fetch(i-count, &fetched_len, &fetched_data); int r = v->fetch(i-count, &fetched_len, &fetched_data);
...@@ -182,6 +183,7 @@ static void fail_one_verify(uint32_t len, uint32_t num, vdmt *v) { ...@@ -182,6 +183,7 @@ static void fail_one_verify(uint32_t len, uint32_t num, vdmt *v) {
} }
static void verify(uint32_t len, uint32_t num, vdmt *v) { static void verify(uint32_t len, uint32_t num, vdmt *v) {
v->verify();
val_type* fetched_data; val_type* fetched_data;
for (uint32_t i = 0; i < num; i++) { for (uint32_t i = 0; i < num; i++) {
uint32_t fetched_len; uint32_t fetched_len;
......
...@@ -449,8 +449,9 @@ int dmt<dmtdata_t, dmtdataout_t>::delete_at(const uint32_t idx) { ...@@ -449,8 +449,9 @@ int dmt<dmtdata_t, dmtdataout_t>::delete_at(const uint32_t idx) {
this->clear(); //Emptying out the entire dmt. this->clear(); //Emptying out the entire dmt.
return 0; return 0;
} }
//TODO: support array delete
if (this->is_array) { if (this->is_array) {
//TODO: support array delete
//TODO: If/when we implement array delete, we must update verify() (and others?) w.r.t. mempool fragmentation
this->convert_from_array_to_tree(); this->convert_from_array_to_tree();
} }
paranoid_invariant(!is_array); paranoid_invariant(!is_array);
...@@ -489,27 +490,70 @@ int dmt<dmtdata_t, dmtdataout_t>::iterate_on_range(const uint32_t left, const ui ...@@ -489,27 +490,70 @@ int dmt<dmtdata_t, dmtdataout_t>::iterate_on_range(const uint32_t left, const ui
template<typename dmtdata_t, typename dmtdataout_t> template<typename dmtdata_t, typename dmtdataout_t>
void dmt<dmtdata_t, dmtdataout_t>::verify(void) const { void dmt<dmtdata_t, dmtdataout_t>::verify(void) const {
if (!is_array) { uint32_t num_values = this->size();
verify_internal(this->d.t.root); invariant(num_values < UINT32_MAX);
size_t pool_used = toku_mempool_get_used_space(&this->mp);
size_t pool_size = toku_mempool_get_size(&this->mp);
size_t pool_frag = toku_mempool_get_frag_size(&this->mp);
invariant(pool_used <= pool_size);
if (this->is_array) {
invariant(this->values_same_size);
invariant(num_values == this->d.a.num_values);
// We know exactly how much memory should be used.
invariant(pool_used == num_values * align(this->value_length));
// Array form must have 0 fragmentation in mempool.
invariant(pool_frag == 0); //TODO: if/when we implement array delete this invariant may need to change.
} else {
if (this->values_same_size) {
// We know exactly how much memory should be used.
invariant(pool_used == num_values * align(this->value_length + __builtin_offsetof(dmt_node, value)));
} else {
// We can only do a lower bound on memory usage.
invariant(pool_used >= num_values * __builtin_offsetof(dmt_node, value));
}
std::vector<bool> touched(pool_size, false);
verify_internal(this->d.t.root, &touched);
size_t bytes_used = 0;
for (size_t i = 0; i < pool_size; i++) {
if (touched.at(i)) {
++bytes_used;
}
}
invariant(bytes_used == pool_used);
} }
} }
// Verifies all weights are internally consistent.
template<typename dmtdata_t, typename dmtdataout_t> template<typename dmtdata_t, typename dmtdataout_t>
void dmt<dmtdata_t, dmtdataout_t>::verify_internal(const subtree &subtree) const { void dmt<dmtdata_t, dmtdataout_t>::verify_internal(const subtree &subtree, std::vector<bool> *touched) const {
if (subtree.is_null()) { if (subtree.is_null()) {
return; return;
} }
const dmt_node &node = get_node(subtree); const dmt_node &node = get_node(subtree);
if (this->values_same_size) {
invariant(node.value_length == this->value_length);
}
size_t offset = toku_mempool_get_offset_from_pointer_and_base(&this->mp, &node);
size_t node_size = align(__builtin_offsetof(dmt_node, value) + node.value_length);
invariant(offset <= touched->size());
invariant(offset+node_size <= touched->size());
invariant(offset % ALIGNMENT == 0);
// Mark memory as touched and never allocated to multiple nodes.
for (size_t i = offset; i < offset+node_size; ++i) {
invariant(!touched->at(i));
touched->at(i) = true;
}
const uint32_t leftweight = this->nweight(node.left); const uint32_t leftweight = this->nweight(node.left);
const uint32_t rightweight = this->nweight(node.right); const uint32_t rightweight = this->nweight(node.right);
invariant(leftweight + rightweight + 1 == this->nweight(subtree)); invariant(leftweight + rightweight + 1 == this->nweight(subtree));
if (this->values_same_size) { verify_internal(node.left, touched);
invariant(node.value_length == this->value_length); verify_internal(node.right, touched);
}
verify_internal(node.left);
verify_internal(node.right);
} }
template<typename dmtdata_t, typename dmtdataout_t> template<typename dmtdata_t, typename dmtdataout_t>
...@@ -621,7 +665,6 @@ void dmt<dmtdata_t, dmtdataout_t>::node_free(const subtree &st) { ...@@ -621,7 +665,6 @@ void dmt<dmtdata_t, dmtdataout_t>::node_free(const subtree &st) {
template<typename dmtdata_t, typename dmtdataout_t> template<typename dmtdata_t, typename dmtdataout_t>
void dmt<dmtdata_t, dmtdataout_t>::maybe_resize_tree(const dmtdatain_t * value) { void dmt<dmtdata_t, dmtdataout_t>::maybe_resize_tree(const dmtdatain_t * value) {
static_assert(std::is_same<dmtdatain_t, dmtdatain_t>::value, "functor wrong type");
const ssize_t curr_capacity = toku_mempool_get_size(&this->mp); const ssize_t curr_capacity = toku_mempool_get_size(&this->mp);
const ssize_t curr_free = toku_mempool_get_free_space(&this->mp); const ssize_t curr_free = toku_mempool_get_free_space(&this->mp);
const ssize_t curr_used = toku_mempool_get_used_space(&this->mp); const ssize_t curr_used = toku_mempool_get_used_space(&this->mp);
......
...@@ -95,6 +95,7 @@ PATENT RIGHTS GRANT: ...@@ -95,6 +95,7 @@ PATENT RIGHTS GRANT:
#include <toku_race_tools.h> #include <toku_race_tools.h>
#include "growable_array.h" #include "growable_array.h"
#include "../ft/wbuf.h" #include "../ft/wbuf.h"
#include <vector>
namespace toku { namespace toku {
typedef uint32_t node_idx; typedef uint32_t node_idx;
...@@ -201,7 +202,9 @@ using namespace toku::dmt_internal; ...@@ -201,7 +202,9 @@ using namespace toku::dmt_internal;
template<typename dmtdata_t> template<typename dmtdata_t>
class dmt_functor { class dmt_functor {
// Ensures that if you forget to use partial specialization this compile error will remind you to use it. // Ensures that if you forget to use partial specialization this compile error will remind you to use it.
static_assert(!std::is_same<dmtdata_t, dmtdata_t>::value, "Must use partial specialization on dmt_functor"); // We would use static_assert(false, ...) here except that it would cause a compile error even if dmt_functor<>
// We instead use an expression that evaluates to false that the compiler won't evaluate unless dmt_functor<> is used.
static_assert(!std::is_same<dmtdata_t, dmtdata_t>::value, "Cannot use default dmt_functor<>. Use partial specialization.");
// Defines the interface: // Defines the interface:
static size_t get_dmtdata_t_size(const dmtdata_t &) { return 0; } static size_t get_dmtdata_t_size(const dmtdata_t &) { return 0; }
size_t get_dmtdatain_t_size(void) { return 0; } size_t get_dmtdatain_t_size(void) { return 0; }
...@@ -377,6 +380,7 @@ public: ...@@ -377,6 +380,7 @@ public:
int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)> int (*f)(const uint32_t, const dmtdata_t &, const uint32_t, iterate_extra_t *const)>
int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const; int iterate_on_range(const uint32_t left, const uint32_t right, iterate_extra_t *const iterate_extra) const;
// Attempt to verify this dmt is well formed. (Crashes/asserts/aborts if not well formed)
void verify(void) const; void verify(void) const;
/** /**
...@@ -503,9 +507,14 @@ public: ...@@ -503,9 +507,14 @@ public:
void prepare_for_serialize(void); void prepare_for_serialize(void);
private: private:
// Do a bit of verification that subtree and nodes act like packed c structs and do not introduce unnecessary padding for alignment.
ENSURE_POD(subtree);
static_assert(ALIGNMENT > 0, "ALIGNMENT <= 0");
static_assert((ALIGNMENT & (ALIGNMENT - 1)) == 0, "ALIGNMENT not a power of 2");
static_assert(sizeof(dmt_node) - sizeof(dmtdata_t) == __builtin_offsetof(dmt_node, value), "value is not last field in node"); static_assert(sizeof(dmt_node) - sizeof(dmtdata_t) == __builtin_offsetof(dmt_node, value), "value is not last field in node");
static_assert(4 * sizeof(uint32_t) == __builtin_offsetof(dmt_node, value), "dmt_node is padded"); static_assert(4 * sizeof(uint32_t) == __builtin_offsetof(dmt_node, value), "dmt_node is padded");
ENSURE_POD(subtree); static_assert(__builtin_offsetof(dmt_node, value) % ALIGNMENT == 0, "dmt_node requires padding for alignment");
ENSURE_POD(dmt_node);
struct dmt_array { struct dmt_array {
uint32_t start_idx; uint32_t start_idx;
...@@ -516,7 +525,6 @@ private: ...@@ -516,7 +525,6 @@ private:
subtree root; subtree root;
}; };
bool values_same_size; bool values_same_size;
uint32_t value_length; uint32_t value_length;
struct mempool mp; struct mempool mp;
...@@ -528,7 +536,7 @@ private: ...@@ -528,7 +536,7 @@ private:
uint32_t get_fixed_length_alignment_overhead(void) const; uint32_t get_fixed_length_alignment_overhead(void) const;
void verify_internal(const subtree &subtree) const; void verify_internal(const subtree &subtree, std::vector<bool> *touched) const;
void create_internal_no_alloc(bool as_tree); void create_internal_no_alloc(bool as_tree);
...@@ -647,6 +655,7 @@ private: ...@@ -647,6 +655,7 @@ private:
int find_internal_minus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const; int find_internal_minus(const subtree &subtree, const dmtcmp_t &extra, uint32_t *const value_len, dmtdataout_t *const value, uint32_t *const idxp) const;
node_idx* alloc_temp_node_idxs(uint32_t num_idxs); node_idx* alloc_temp_node_idxs(uint32_t num_idxs);
uint32_t align(const uint32_t x) const; uint32_t align(const uint32_t x) const;
}; };
......
...@@ -168,9 +168,9 @@ void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, si ...@@ -168,9 +168,9 @@ void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, si
return reinterpret_cast<void*>(reinterpret_cast<char*>(mp->base) + offset); return reinterpret_cast<void*>(reinterpret_cast<char*>(mp->base) + offset);
} }
size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, void* p) { size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, const void* p) {
paranoid_invariant(p >= mp->base); paranoid_invariant(p >= mp->base);
return reinterpret_cast<char*>(p) - reinterpret_cast<char*>(mp->base); return reinterpret_cast<const char*>(p) - reinterpret_cast<const char*>(mp->base);
} }
size_t toku_mempool_get_size(const struct mempool *mp) { size_t toku_mempool_get_size(const struct mempool *mp) {
......
...@@ -137,7 +137,7 @@ void *toku_mempool_get_base(const struct mempool *mp); ...@@ -137,7 +137,7 @@ void *toku_mempool_get_base(const struct mempool *mp);
void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, size_t offset); void *toku_mempool_get_pointer_from_base_and_offset(const struct mempool *mp, size_t offset);
/* get the offset from base of a pointer */ /* get the offset from base of a pointer */
size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, void* p); size_t toku_mempool_get_offset_from_pointer_and_base(const struct mempool *mp, const void* p);
/* get the a pointer of the first free byte (if any) */ /* get the a pointer of the first free byte (if any) */
void* toku_mempool_get_next_free_ptr(const struct mempool *mp); void* toku_mempool_get_next_free_ptr(const struct mempool *mp);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment