Commit fc6a4a33 authored by Sergei Petrunia's avatar Sergei Petrunia

Cleanup histogram collection code

parent 02a67307
......@@ -1959,7 +1959,9 @@ class Stat_table_write_iter
/*
Histogram_builder is a helper class that is used to build histograms
for columns
for columns.
Do not create directly, call Histogram->get_builder(...);
*/
class Histogram_builder
......@@ -2027,8 +2029,17 @@ class Histogram_builder
}
return 0;
}
virtual void finalize(){}
};
Histogram_builder *Histogram_binary::create_builder(Field *col, uint col_len,
ha_rows rows)
{
return new Histogram_builder(col, col_len, rows);
}
class Histogram_builder_json : public Histogram_builder
{
std::vector<std::string> bucket_bounds;
......@@ -2036,13 +2047,14 @@ class Histogram_builder_json : public Histogram_builder
public:
Histogram_builder_json(Field *col, uint col_len, ha_rows rows)
: Histogram_builder(col, col_len, rows)
{
bucket_bounds = {};
}
: Histogram_builder(col, col_len, rows) {}
~Histogram_builder_json() override = default;
/*
Add data to the histogram. Adding Element elem which encountered elem_cnt
times.
*/
int next(void *elem, element_count elem_cnt) override
{
count_distinct++;
......@@ -2053,24 +2065,26 @@ class Histogram_builder_json : public Histogram_builder
return 0;
if (count > bucket_capacity * (curr_bucket + 1))
{
column->store_field_value((uchar *) elem, col_length);
column->store_field_value((uchar*) elem, col_length);
StringBuffer<MAX_FIELD_WIDTH> val;
column->val_str(&val);
auto it = bucket_bounds.begin();
bucket_bounds.insert(it+curr_bucket, std::string(val.ptr(), val.length()));
bucket_bounds.push_back(std::string(val.ptr(), val.length()));
curr_bucket++;
while (curr_bucket != hist_width &&
count > bucket_capacity * (curr_bucket + 1))
{
it = bucket_bounds.begin();
bucket_bounds.insert(it+curr_bucket, bucket_bounds[curr_bucket-1]);
bucket_bounds.push_back(std::string(val.ptr(), val.length()));
curr_bucket++;
}
}
return 0;
}
void build_json_from_histogram() {
/*
Finalize the creation of histogram
*/
void finalize() override
{
Json_writer writer;
writer.start_object();
writer.add_member(Histogram_json::JSON_NAME).start_array();
......@@ -2087,6 +2101,15 @@ class Histogram_builder_json : public Histogram_builder
};
Histogram_builder *Histogram_json::create_builder(Field *col, uint col_len,
ha_rows rows)
{
return new Histogram_builder_json(col, col_len, rows);
}
Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type,
THD *owner)
{
......@@ -2111,19 +2134,12 @@ Histogram_base *create_histogram(MEM_ROOT *mem_root, Histogram_type hist_type,
C_MODE_START
int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
static int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
{
Histogram_builder *hist_builder= (Histogram_builder *) arg;
return hist_builder->next(elem, elem_cnt);
}
int json_histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
{
Histogram_builder_json *hist_builder= (Histogram_builder_json *) arg;
return hist_builder->next(elem, elem_cnt);
}
static int count_distinct_single_occurence_walk(void *elem,
element_count count, void *arg)
......@@ -2228,24 +2244,16 @@ class Count_distinct_field: public Sql_alloc
*/
void walk_tree_with_histogram(ha_rows rows)
{
// GSOC-TODO: is below a meaningful difference:
if (table_field->collected_stats->histogram_->get_type() == JSON_HB)
{
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
tree->walk(table_field->table, json_histogram_build_walk,
(void *) &hist_builder);
hist_builder.build_json_from_histogram();
distincts= hist_builder.get_count_distinct();
distincts_single_occurence= hist_builder.get_count_single_occurence();
}
else
{
Histogram_builder hist_builder(table_field, tree_key_length, rows);
tree->walk(table_field->table, histogram_build_walk,
(void *) &hist_builder);
distincts= hist_builder.get_count_distinct();
distincts_single_occurence= hist_builder.get_count_single_occurence();
}
Histogram_base *hist= table_field->collected_stats->histogram_;
Histogram_builder *hist_builder=
hist->create_builder(table_field, tree_key_length, rows);
tree->walk(table_field->table, histogram_build_walk,
(void *) hist_builder);
hist_builder->finalize();
distincts= hist_builder->get_count_distinct();
distincts_single_occurence= hist_builder->get_count_single_occurence();
delete hist_builder;
}
ulonglong get_count_distinct()
......
......@@ -146,6 +146,8 @@ double get_column_range_cardinality(Field *field,
bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table);
bool is_eits_usable(Field* field);
class Histogram_builder;
/*
Common base for all histograms
*/
......@@ -160,6 +162,9 @@ class Histogram_base : public Sql_alloc
virtual uint get_width()=0;
virtual Histogram_builder *create_builder(Field *col, uint col_len,
ha_rows rows)=0;
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size)=0;
......@@ -290,6 +295,8 @@ class Histogram_binary : public Histogram_base
void serialize(Field *to_field) override;
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size) override;
Histogram_builder *create_builder(Field *col, uint col_len,
ha_rows rows) override;
bool is_available() override { return (values!=NULL); }
......@@ -373,6 +380,9 @@ class Histogram_json : public Histogram_base
void serialize(Field *field) override;
Histogram_builder *create_builder(Field *col, uint col_len,
ha_rows rows) override;
// returns number of buckets in the histogram
uint get_width() override
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment