Commit 1fa7af74 authored by Michael Okoko's avatar Michael Okoko Committed by Sergei Petrunia

Split histogram classes and into JSON and binary classes

Signed-off-by: default avatarMichael Okoko <okokomichaels@outlook.com>
parent 1998b787
...@@ -76,6 +76,8 @@ std::vector<std::string> parse_histogram_from_json(const char *json); ...@@ -76,6 +76,8 @@ std::vector<std::string> parse_histogram_from_json(const char *json);
void test_parse_histogram_from_json(); void test_parse_histogram_from_json();
Histogram_base *create_histogram(Histogram_type hist_type);
/* Currently there are only 3 persistent statistical tables */ /* Currently there are only 3 persistent statistical tables */
static const uint STATISTICS_TABLES= 3; static const uint STATISTICS_TABLES= 3;
...@@ -1217,9 +1219,9 @@ class Column_stat: public Stat_table ...@@ -1217,9 +1219,9 @@ class Column_stat: public Stat_table
The method assumes that the value of histogram size and the pointer to The method assumes that the value of histogram size and the pointer to
the histogram location has been already set in the fields size and values the histogram location has been already set in the fields size and values
of read_stats->histogram. of read_stats->histogram.
*/ */
Histogram * load_histogram(MEM_ROOT *mem_root) Histogram_binary * load_histogram(MEM_ROOT *mem_root)
{ {
if (find_stat()) if (find_stat())
{ {
...@@ -1230,7 +1232,7 @@ class Column_stat: public Stat_table ...@@ -1230,7 +1232,7 @@ class Column_stat: public Stat_table
table_field->read_stats->set_not_null(fldno); table_field->read_stats->set_not_null(fldno);
stat_field->val_str(&val); stat_field->val_str(&val);
// histogram-todo: here, create the histogram of appropriate type. // histogram-todo: here, create the histogram of appropriate type.
Histogram *hist= new (mem_root) Histogram(); Histogram_binary *hist= new (mem_root) Histogram_binary();
if (!hist->parse(mem_root, table_field->read_stats->histogram_type_on_disk, if (!hist->parse(mem_root, table_field->read_stats->histogram_type_on_disk,
(const uchar*)val.ptr(), val.length())) (const uchar*)val.ptr(), val.length()))
{ {
...@@ -1244,7 +1246,7 @@ class Column_stat: public Stat_table ...@@ -1244,7 +1246,7 @@ class Column_stat: public Stat_table
} }
}; };
bool Histogram::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr_arg, uint size_arg) bool Histogram_binary::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr_arg, uint size_arg)
{ {
// Just copy the data // Just copy the data
size = (uint8) size_arg; size = (uint8) size_arg;
...@@ -1258,7 +1260,7 @@ bool Histogram::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar * ...@@ -1258,7 +1260,7 @@ bool Histogram::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *
/* /*
Save the histogram data info a table field. Save the histogram data info a table field.
*/ */
void Histogram::serialize(Field *field) void Histogram_binary::serialize(Field *field)
{ {
if (get_type() == JSON) if (get_type() == JSON)
{ {
...@@ -1269,7 +1271,7 @@ void Histogram::serialize(Field *field) ...@@ -1269,7 +1271,7 @@ void Histogram::serialize(Field *field)
field->store((char*)get_values(), get_size(), &my_charset_bin); field->store((char*)get_values(), get_size(), &my_charset_bin);
} }
void Histogram::init_for_collection(MEM_ROOT *mem_root, void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
Histogram_type htype_arg, Histogram_type htype_arg,
ulonglong size_arg) ulonglong size_arg)
{ {
...@@ -1278,6 +1280,13 @@ void Histogram::init_for_collection(MEM_ROOT *mem_root, ...@@ -1278,6 +1280,13 @@ void Histogram::init_for_collection(MEM_ROOT *mem_root,
size= (uint8) size_arg; size= (uint8) size_arg;
} }
void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size_arg)
{
type= htype_arg;
values = (uchar*)alloc_root(mem_root, size_arg);
size = (uint8) size_arg;
}
/* /*
An object of the class Index_stat is created to read statistical An object of the class Index_stat is created to read statistical
data on tables from the statistical table table_stat, to update data on tables from the statistical table table_stat, to update
...@@ -1595,13 +1604,13 @@ class Stat_table_write_iter ...@@ -1595,13 +1604,13 @@ class Stat_table_write_iter
class Histogram_builder class Histogram_builder
{ {
protected: private:
Field *column; /* table field for which the histogram is built */ Field *column; /* table field for which the histogram is built */
uint col_length; /* size of this field */ uint col_length; /* size of this field */
ha_rows records; /* number of records the histogram is built for */ ha_rows records; /* number of records the histogram is built for */
Field *min_value; /* pointer to the minimal value for the field */ Field *min_value; /* pointer to the minimal value for the field */
Field *max_value; /* pointer to the maximal value for the field */ Field *max_value; /* pointer to the maximal value for the field */
Histogram *histogram; /* the histogram location */ Histogram_binary *histogram; /* the histogram location */
uint hist_width; /* the number of points in the histogram */ uint hist_width; /* the number of points in the histogram */
double bucket_capacity; /* number of rows in a bucket of the histogram */ double bucket_capacity; /* number of rows in a bucket of the histogram */
uint curr_bucket; /* number of the current bucket to be built */ uint curr_bucket; /* number of the current bucket to be built */
...@@ -1617,7 +1626,7 @@ class Histogram_builder ...@@ -1617,7 +1626,7 @@ class Histogram_builder
Column_statistics *col_stats= col->collected_stats; Column_statistics *col_stats= col->collected_stats;
min_value= col_stats->min_value; min_value= col_stats->min_value;
max_value= col_stats->max_value; max_value= col_stats->max_value;
histogram= col_stats->histogram_; histogram= dynamic_cast<Histogram_binary *>(col_stats->histogram_);
hist_width= histogram->get_width(); hist_width= histogram->get_width();
bucket_capacity= (double) records / (hist_width + 1); bucket_capacity= (double) records / (hist_width + 1);
curr_bucket= 0; curr_bucket= 0;
...@@ -1626,6 +1635,8 @@ class Histogram_builder ...@@ -1626,6 +1635,8 @@ class Histogram_builder
count_distinct_single_occurence= 0; count_distinct_single_occurence= 0;
} }
Histogram_builder() = default;
virtual ~Histogram_builder() = default; virtual ~Histogram_builder() = default;
ulonglong get_count_distinct() const { return count_distinct; } ulonglong get_count_distinct() const { return count_distinct; }
...@@ -1661,16 +1672,29 @@ class Histogram_builder ...@@ -1661,16 +1672,29 @@ class Histogram_builder
class Histogram_builder_json : public Histogram_builder class Histogram_builder_json : public Histogram_builder
{ {
std::vector<std::string> bucket_bounds = {}; Field *column; /* table field for which the histogram is built */
uint col_length; /* size of this field */
ha_rows records; /* number of records the histogram is built for */
Field *min_value; /* pointer to the minimal value for the field */
Field *max_value; /* pointer to the maximal value for the field */
Histogram_json *histogram; /* the histogram location */
uint hist_width; /* the number of points in the histogram */
double bucket_capacity; /* number of rows in a bucket of the histogram */
uint curr_bucket; /* number of the current bucket to be built */
ulonglong count; /* number of values retrieved */
ulonglong count_distinct; /* number of distinct values retrieved */
/* number of distinct values that occured only once */
ulonglong count_distinct_single_occurence;
std::vector<std::string> bucket_bounds = {};
public: public:
Histogram_builder_json(Field *col, uint col_len, ha_rows rows) Histogram_builder_json(Field *col, uint col_len, ha_rows rows)
: Histogram_builder(col, col_len, rows) : column(col), col_length(col_len), records(rows)
{ {
Column_statistics *col_stats= col->collected_stats; Column_statistics *col_stats= col->collected_stats;
min_value= col_stats->min_value; min_value= col_stats->min_value;
max_value= col_stats->max_value; max_value= col_stats->max_value;
histogram= col_stats->histogram_; histogram= dynamic_cast<Histogram_json *>(col_stats->histogram_);
hist_width= histogram->get_width(); hist_width= histogram->get_width();
bucket_capacity= (double) records / (hist_width + 1); bucket_capacity= (double) records / (hist_width + 1);
curr_bucket= 0; curr_bucket= 0;
...@@ -1718,6 +1742,15 @@ std::vector<std::string> bucket_bounds = {}; ...@@ -1718,6 +1742,15 @@ std::vector<std::string> bucket_bounds = {};
} }
}; };
Histogram_base *create_histogram(Histogram_type hist_type)
{
// assumes the caller already checked for invalid histograms
if (hist_type == JSON)
return new Histogram_json;
else
return new Histogram_binary;
}
void test_parse_histogram_from_json() void test_parse_histogram_from_json()
{ {
std::vector<std::string> bucket = {}; std::vector<std::string> bucket = {};
...@@ -1954,9 +1987,9 @@ class Count_distinct_field: public Sql_alloc ...@@ -1954,9 +1987,9 @@ class Count_distinct_field: public Sql_alloc
@brief @brief
Get the pointer to the histogram built for table_field Get the pointer to the histogram built for table_field
*/ */
Histogram *get_histogram() Histogram_binary *get_histogram()
{ {
return table_field->collected_stats->histogram_; return dynamic_cast<Histogram_binary *>(table_field->collected_stats->histogram_);
} }
}; };
...@@ -2608,18 +2641,18 @@ bool Column_statistics_collected::add() ...@@ -2608,18 +2641,18 @@ bool Column_statistics_collected::add()
/* /*
Create an empty Histogram object from histogram_type. Create an empty Histogram_binary object from histogram_type.
Note: it is not yet clear whether collection-time histogram should be the same Note: it is not yet clear whether collection-time histogram should be the same
as lookup-time histogram. At the moment, they are. as lookup-time histogram. At the moment, they are.
*/ */
Histogram* get_histogram_by_type(MEM_ROOT *mem_root, Histogram_type hist_type) { Histogram_binary * get_histogram_by_type(MEM_ROOT *mem_root, Histogram_type hist_type) {
switch (hist_type) { switch (hist_type) {
case SINGLE_PREC_HB: case SINGLE_PREC_HB:
case DOUBLE_PREC_HB: case DOUBLE_PREC_HB:
case JSON: case JSON:
return new Histogram(); return new Histogram_binary();
default: default:
DBUG_ASSERT(0); DBUG_ASSERT(0);
} }
...@@ -2660,7 +2693,7 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl ...@@ -2660,7 +2693,7 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
if (hist_size != 0 && hist_type != INVALID_HISTOGRAM) if (hist_size != 0 && hist_type != INVALID_HISTOGRAM)
{ {
have_histogram= true; have_histogram= true;
histogram_= new Histogram; histogram_= create_histogram(hist_type);
histogram_->init_for_collection(mem_root, hist_type, hist_size); histogram_->init_for_collection(mem_root, hist_type, hist_size);
} }
...@@ -4048,7 +4081,8 @@ double get_column_range_cardinality(Field *field, ...@@ -4048,7 +4081,8 @@ double get_column_range_cardinality(Field *field,
if (avg_frequency > 1.0 + 0.000001 && if (avg_frequency > 1.0 + 0.000001 &&
col_stats->min_max_values_are_provided()) col_stats->min_max_values_are_provided())
{ {
Histogram *hist= col_stats->histogram_; Histogram_binary *hist=
dynamic_cast<Histogram_binary *>(col_stats->histogram_);
if (hist && hist->is_usable(thd)) if (hist && hist->is_usable(thd))
{ {
store_key_image_to_rec(field, (uchar *) min_endp->key, store_key_image_to_rec(field, (uchar *) min_endp->key,
...@@ -4092,7 +4126,8 @@ double get_column_range_cardinality(Field *field, ...@@ -4092,7 +4126,8 @@ double get_column_range_cardinality(Field *field,
else else
max_mp_pos= 1.0; max_mp_pos= 1.0;
Histogram *hist= col_stats->histogram_; Histogram_binary *hist=
dynamic_cast<Histogram_binary *>(col_stats->histogram_);
if (hist && hist->is_usable(thd)) if (hist && hist->is_usable(thd))
sel= hist->range_selectivity(min_mp_pos, max_mp_pos); sel= hist->range_selectivity(min_mp_pos, max_mp_pos);
else else
...@@ -4143,7 +4178,7 @@ double get_column_range_cardinality(Field *field, ...@@ -4143,7 +4178,7 @@ double get_column_range_cardinality(Field *field,
value. value.
*/ */
double Histogram::point_selectivity(double pos, double avg_sel) double Histogram_binary::point_selectivity(double pos, double avg_sel)
{ {
double sel; double sel;
/* Find the bucket that contains the value 'pos'. */ /* Find the bucket that contains the value 'pos'. */
...@@ -4179,7 +4214,7 @@ double Histogram::point_selectivity(double pos, double avg_sel) ...@@ -4179,7 +4214,7 @@ double Histogram::point_selectivity(double pos, double avg_sel)
/* /*
The value 'pos' fits within one single histogram bucket. The value 'pos' fits within one single histogram bucket.
Histogram buckets have the same numbers of rows, but they cover Histogram_binary buckets have the same numbers of rows, but they cover
different ranges of values. different ranges of values.
We assume that values are uniformly distributed across the [0..1] value We assume that values are uniformly distributed across the [0..1] value
......
...@@ -153,6 +153,24 @@ class Histogram_base : public Sql_alloc ...@@ -153,6 +153,24 @@ class Histogram_base : public Sql_alloc
virtual void serialize(Field *to_field)= 0; virtual void serialize(Field *to_field)= 0;
virtual Histogram_type get_type()=0; virtual Histogram_type get_type()=0;
virtual uint get_width()=0;
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size)=0;
virtual bool is_available()=0;
virtual bool is_usable(THD *thd)=0;
virtual void set_values(uchar * values)=0;
virtual uchar *get_values()=0;
virtual void set_size(ulonglong sz)=0;
virtual double range_selectivity(double min_pos, double max_pos)=0;
virtual double point_selectivity(double pos, double avg_selection)=0;
// Legacy: return the size of the histogram on disk. // Legacy: return the size of the histogram on disk.
// This will be stored in mysql.column_stats.hist_size column. // This will be stored in mysql.column_stats.hist_size column.
...@@ -162,22 +180,21 @@ class Histogram_base : public Sql_alloc ...@@ -162,22 +180,21 @@ class Histogram_base : public Sql_alloc
virtual ~Histogram_base(){} virtual ~Histogram_base(){}
}; };
class Histogram : public Histogram_base class Histogram_binary : public Histogram_base
{ {
public: public:
bool parse(MEM_ROOT *mem_root, Histogram_type type_arg, bool parse(MEM_ROOT *mem_root, Histogram_type type_arg,
const uchar *ptr_arg, uint size_arg) override; const uchar *ptr_arg, uint size_arg) override;
void serialize(Field *to_field) override; void serialize(Field *to_field) override;
Histogram_type get_type() override { return type; } Histogram_type get_type() override { return type; }
uint get_size() override { return (uint) size; } uint get_size() override { return (uint) size; }
// returns number of buckets in the histogram uint get_width() override
uint get_width()
{ {
switch (type) { switch (type) {
case SINGLE_PREC_HB: case SINGLE_PREC_HB:
case JSON:
return size; return size;
case DOUBLE_PREC_HB: case DOUBLE_PREC_HB:
return size / 2; return size / 2;
...@@ -196,7 +213,6 @@ class Histogram : public Histogram_base ...@@ -196,7 +213,6 @@ class Histogram : public Histogram_base
{ {
switch (type) { switch (type) {
case SINGLE_PREC_HB: case SINGLE_PREC_HB:
case JSON:
return ((uint) (1 << 8) - 1); return ((uint) (1 << 8) - 1);
case DOUBLE_PREC_HB: case DOUBLE_PREC_HB:
return ((uint) (1 << 16) - 1); return ((uint) (1 << 16) - 1);
...@@ -211,7 +227,6 @@ class Histogram : public Histogram_base ...@@ -211,7 +227,6 @@ class Histogram : public Histogram_base
DBUG_ASSERT(i < get_width()); DBUG_ASSERT(i < get_width());
switch (type) { switch (type) {
case SINGLE_PREC_HB: case SINGLE_PREC_HB:
case JSON:
return (uint) (((uint8 *) values)[i]); return (uint) (((uint8 *) values)[i]);
case DOUBLE_PREC_HB: case DOUBLE_PREC_HB:
return (uint) uint2korr(values + i * 2); return (uint) uint2korr(values + i * 2);
...@@ -260,22 +275,22 @@ class Histogram : public Histogram_base ...@@ -260,22 +275,22 @@ class Histogram : public Histogram_base
return i; return i;
} }
uchar *get_values() { return (uchar *) values; } uchar *get_values() override { return (uchar *) values; }
public: public:
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size); void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override;
// Note: these two are used only for saving the JSON text: // Note: these two are used only for saving the JSON text:
void set_values (uchar *vals) { values= (uchar *) vals; } void set_values (uchar *vals) override { values= (uchar *) vals; }
void set_size (ulonglong sz) { size= (uint8) sz; } void set_size (ulonglong sz) override { size= (uint8) sz; }
bool is_available() { return get_size() > 0 && get_values(); } bool is_available() override { return get_size() > 0 && get_values(); }
/* /*
This function checks that histograms should be usable only when This function checks that histograms should be usable only when
1) the level of optimizer_use_condition_selectivity > 3 1) the level of optimizer_use_condition_selectivity > 3
2) histograms have been collected 2) histograms have been collected
*/ */
bool is_usable(THD *thd) bool is_usable(THD *thd) override
{ {
return thd->variables.optimizer_use_condition_selectivity > 3 && return thd->variables.optimizer_use_condition_selectivity > 3 &&
is_available(); is_available();
...@@ -285,7 +300,6 @@ class Histogram : public Histogram_base ...@@ -285,7 +300,6 @@ class Histogram : public Histogram_base
{ {
switch (type) { switch (type) {
case SINGLE_PREC_HB: case SINGLE_PREC_HB:
case JSON:
((uint8 *) values)[i]= (uint8) (val * prec_factor()); ((uint8 *) values)[i]= (uint8) (val * prec_factor());
return; return;
case DOUBLE_PREC_HB: case DOUBLE_PREC_HB:
...@@ -301,7 +315,6 @@ class Histogram : public Histogram_base ...@@ -301,7 +315,6 @@ class Histogram : public Histogram_base
{ {
switch (type) { switch (type) {
case SINGLE_PREC_HB: case SINGLE_PREC_HB:
case JSON:
((uint8 *) values)[i]= ((uint8 *) values)[i-1]; ((uint8 *) values)[i]= ((uint8 *) values)[i-1];
return; return;
case DOUBLE_PREC_HB: case DOUBLE_PREC_HB:
...@@ -313,7 +326,7 @@ class Histogram : public Histogram_base ...@@ -313,7 +326,7 @@ class Histogram : public Histogram_base
} }
} }
double range_selectivity(double min_pos, double max_pos) double range_selectivity(double min_pos, double max_pos) override
{ {
double sel; double sel;
double bucket_sel= 1.0/(get_width() + 1); double bucket_sel= 1.0/(get_width() + 1);
...@@ -326,9 +339,54 @@ class Histogram : public Histogram_base ...@@ -326,9 +339,54 @@ class Histogram : public Histogram_base
/* /*
Estimate selectivity of "col=const" using a histogram Estimate selectivity of "col=const" using a histogram
*/ */
double point_selectivity(double pos, double avg_sel); double point_selectivity(double pos, double avg_sel) override;
}; };
class Histogram_json : public Histogram_base
{
private:
Histogram_type type;
uint8 size; /* Number of elements in the histogram*/
uchar *values;
public:
bool parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *ptr, uint size) override {return false;}
void serialize(Field *to_field) override{}
uint get_size() override {return (uint) size;}
// returns number of buckets in the histogram
uint get_width() override
{
return size;
};
Histogram_type get_type() override
{
return JSON;
}
void set_size (ulonglong sz) override {size = (uint8) sz; }
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override;
bool is_available() override {return get_size() > 0 && get_values(); }
bool is_usable(THD *thd) override
{
return thd->variables.optimizer_use_condition_selectivity > 3 &&
is_available();
}
void set_values (uchar *vals) override { values= (uchar *) vals; }
uchar *get_values() override { return (uchar *) values; }
double range_selectivity(double min_pos, double max_pos) override {return 0.1;}
double point_selectivity(double pos, double avg_selection) override {return 0.5;}
};
class Columns_statistics; class Columns_statistics;
class Index_statistics; class Index_statistics;
...@@ -411,7 +469,7 @@ class Column_statistics ...@@ -411,7 +469,7 @@ class Column_statistics
public: public:
Histogram_type histogram_type_on_disk; Histogram_type histogram_type_on_disk;
Histogram *histogram_; Histogram_base *histogram_;
uint32 no_values_provided_bitmap() uint32 no_values_provided_bitmap()
{ {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment