Commit 237447de authored by Michael Okoko's avatar Michael Okoko Committed by Sergei Petrunia

rough base for json histogram builder

Signed-off-by: default avatarMichael Okoko <okokomichaels@outlook.com>
parent 567552b4
...@@ -25,4 +25,10 @@ DESCRIBE mysql.column_stats; ...@@ -25,4 +25,10 @@ DESCRIBE mysql.column_stats;
SELECT * FROM mysql.column_stats; SELECT * FROM mysql.column_stats;
set histogram_type=@save_histogram_type; set histogram_type=@save_histogram_type;
## Remove against Milestone-2
ANALYZE TABLE t1 PERSISTENT FOR COLUMNS(b) INDEXES();
SELECT * FROM mysql.column_stats;
select table_name, hist_type, decode_histogram(hist_type, histogram ) from mysql.column_stats;
DROP TABLE t1; DROP TABLE t1;
\ No newline at end of file
...@@ -32,6 +32,9 @@ ...@@ -32,6 +32,9 @@
#include "uniques.h" #include "uniques.h"
#include "sql_show.h" #include "sql_show.h"
#include "sql_partition.h" #include "sql_partition.h"
#include "my_json_writer.h"
#include <vector>
/* /*
The system variable 'use_stat_tables' can take one of the The system variable 'use_stat_tables' can take one of the
...@@ -1070,13 +1073,8 @@ class Column_stat: public Stat_table ...@@ -1070,13 +1073,8 @@ class Column_stat: public Stat_table
stat_field->store(stats->histogram.get_type() + 1); stat_field->store(stats->histogram.get_type() + 1);
break; break;
case COLUMN_STAT_HISTOGRAM: case COLUMN_STAT_HISTOGRAM:
if (stats->histogram.get_type() == JSON) {
const char* val = "{'hello': 'world'}";
stat_field->store(val, strlen(val), &my_charset_bin);
} else {
stat_field->store((char *) stats->histogram.get_values(), stat_field->store((char *) stats->histogram.get_values(),
stats->histogram.get_size(), &my_charset_bin); stats->histogram.get_size(), &my_charset_bin);
}
break; break;
} }
} }
...@@ -1524,6 +1522,7 @@ class Stat_table_write_iter ...@@ -1524,6 +1522,7 @@ class Stat_table_write_iter
class Histogram_builder class Histogram_builder
{ {
protected:
Field *column; /* table field for which the histogram is built */ Field *column; /* table field for which the histogram is built */
uint col_length; /* size of this field */ uint col_length; /* size of this field */
ha_rows records; /* number of records the histogram is built for */ ha_rows records; /* number of records the histogram is built for */
...@@ -1554,13 +1553,15 @@ class Histogram_builder ...@@ -1554,13 +1553,15 @@ class Histogram_builder
count_distinct_single_occurence= 0; count_distinct_single_occurence= 0;
} }
virtual ~Histogram_builder() = default;
ulonglong get_count_distinct() const { return count_distinct; } ulonglong get_count_distinct() const { return count_distinct; }
ulonglong get_count_single_occurence() const ulonglong get_count_single_occurence() const
{ {
return count_distinct_single_occurence; return count_distinct_single_occurence;
} }
int next(void *elem, element_count elem_cnt) virtual int next(void *elem, element_count elem_cnt)
{ {
count_distinct++; count_distinct++;
if (elem_cnt == 1) if (elem_cnt == 1)
...@@ -1585,6 +1586,57 @@ class Histogram_builder ...@@ -1585,6 +1586,57 @@ class Histogram_builder
} }
}; };
class Histogram_builder_json : public Histogram_builder
{
std::vector<String> bucket_bounds;
public:
Histogram_builder_json(Field *col, uint col_len, ha_rows rows)
: Histogram_builder(col, col_len, rows)
{
Column_statistics *col_stats= col->collected_stats;
min_value= col_stats->min_value;
max_value= col_stats->max_value;
histogram= &col_stats->histogram;
hist_width= histogram->get_width();
bucket_capacity= (double) records / (hist_width + 1);
curr_bucket= 0;
count= 0;
count_distinct= 0;
count_distinct_single_occurence= 0;
bucket_bounds = {};
}
~Histogram_builder_json() override = default;
int next(void *elem, element_count elem_cnt) override
{
count_distinct++;
if (elem_cnt == 1)
count_distinct_single_occurence++;
count+= elem_cnt;
if (curr_bucket == hist_width)
return 0;
if (count > bucket_capacity * (curr_bucket + 1))
{
auto *val= new StringBuffer<MAX_FIELD_WIDTH>;
column->val_str(val);
bucket_bounds.emplace_back(String(val->ptr(), val->length(), &my_charset_bin));
curr_bucket++;
}
return 0;
}
void build() {
Json_writer *writer = new Json_writer();
writer->start_array();
for(auto& value: bucket_bounds) {
writer->add_str(value);
}
writer->end_array();
histogram->set_values((uchar *) writer->output.get_string()->ptr());
}
};
C_MODE_START C_MODE_START
...@@ -1594,6 +1646,12 @@ int histogram_build_walk(void *elem, element_count elem_cnt, void *arg) ...@@ -1594,6 +1646,12 @@ int histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
return hist_builder->next(elem, elem_cnt); return hist_builder->next(elem, elem_cnt);
} }
int json_histogram_build_walk(void *elem, element_count elem_cnt, void *arg)
{
Histogram_builder_json *hist_builder= (Histogram_builder_json *) arg;
return hist_builder->next(elem, elem_cnt);
}
static int count_distinct_single_occurence_walk(void *elem, static int count_distinct_single_occurence_walk(void *elem,
...@@ -1698,12 +1756,24 @@ class Count_distinct_field: public Sql_alloc ...@@ -1698,12 +1756,24 @@ class Count_distinct_field: public Sql_alloc
Calculate a histogram of the tree Calculate a histogram of the tree
*/ */
void walk_tree_with_histogram(ha_rows rows) void walk_tree_with_histogram(ha_rows rows)
{
if(table_field->collected_stats->histogram.get_type() == JSON)
{
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
tree->walk(table_field->table, json_histogram_build_walk,
(void *) &hist_builder);
hist_builder.build();
distincts= hist_builder.get_count_distinct();
distincts_single_occurence= hist_builder.get_count_single_occurence();
} else
{ {
Histogram_builder hist_builder(table_field, tree_key_length, rows); Histogram_builder hist_builder(table_field, tree_key_length, rows);
tree->walk(table_field->table, histogram_build_walk, (void *) &hist_builder); tree->walk(table_field->table, histogram_build_walk,
(void *) &hist_builder);
distincts= hist_builder.get_count_distinct(); distincts= hist_builder.get_count_distinct();
distincts_single_occurence= hist_builder.get_count_single_occurence(); distincts_single_occurence= hist_builder.get_count_single_occurence();
} }
}
ulonglong get_count_distinct() ulonglong get_count_distinct()
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment