Commit 547f8053 authored by Michael Okoko's avatar Michael Okoko Committed by Sergei Petrunia

Refactor histogram point selectivity

Signed-off-by: default avatarMichael Okoko <okokomichaels@outlook.com>
parent e10d99ce
......@@ -1452,6 +1452,11 @@ double pos_in_interval_through_strxfrm(Field *field,
}
double Histogram_json::point_selectivity(Field *field, key_range *min_endp,
key_range *max_endp, double avg_sel)
{
return 0.5;
}
/*
@param field The table field histogram is for. We don't care about the
field's current value, we only need its virtual functions to
......@@ -4320,12 +4325,8 @@ double get_column_range_cardinality(Field *field,
Histogram_base *hist = col_stats->histogram_;
if (hist && hist->is_usable(thd))
{
store_key_image_to_rec(field, (uchar *) min_endp->key,
field->key_length());
double pos= field->pos_in_interval(col_stats->min_value,
col_stats->max_value);
res= col_non_nulls *
hist->point_selectivity(pos,
hist->point_selectivity(field, min_endp, max_endp,
avg_frequency / col_non_nulls);
}
}
......@@ -4356,8 +4357,11 @@ double get_column_range_cardinality(Field *field,
/*
Estimate selectivity of "col=const" using a histogram
@param pos Position of the "const" between column's min_value and
max_value. This is a number in [0..1] range.
@param field - the field to estimate its selectivity.
@param min_endp, max_endp - Specifies the left and right bounds. For point selectivity,
they are both equal.
@param avg_sel Average selectivity of condition "col=const" in this table.
It is calcuated as (#non_null_values / #distinct_values).
......@@ -4386,9 +4390,15 @@ double get_column_range_cardinality(Field *field,
value.
*/
double Histogram_binary::point_selectivity(double pos, double avg_sel)
double Histogram_binary::point_selectivity(Field *field, key_range *min_endp,
key_range *max_endp, double avg_sel)
{
double sel;
Column_statistics *col_stats= field->read_stats;
store_key_image_to_rec(field, (uchar *) min_endp->key,
field->key_length());
double pos= field->pos_in_interval(col_stats->min_value,
col_stats->max_value);
/* Find the bucket that contains the value 'pos'. */
uint min= find_bucket(pos, TRUE);
uint pos_value= (uint) (pos * prec_factor());
......
......@@ -169,7 +169,8 @@ class Histogram_base : public Sql_alloc
virtual void set_size(ulonglong sz)=0;
virtual double point_selectivity(double pos, double avg_selection)=0;
virtual double point_selectivity(Field *field, key_range *min_endp,
key_range *max_endp, double avg_selection)=0;
virtual double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)=0;
......@@ -333,7 +334,8 @@ class Histogram_binary : public Histogram_base
/*
Estimate selectivity of "col=const" using a histogram
*/
double point_selectivity(double pos, double avg_sel) override;
double point_selectivity(Field *field, key_range *min_endp,
key_range *max_endp, double avg_sel) override;
};
class Histogram_json : public Histogram_base
......@@ -385,7 +387,7 @@ class Histogram_json : public Histogram_base
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override;
bool is_available() override {return get_width() > 0 /*&& get_values()*/; }
bool is_available() override {return true; }
bool is_usable(THD *thd) override
{
......@@ -397,11 +399,9 @@ class Histogram_json : public Histogram_base
uchar *get_values() override { return (uchar *) values; }
double point_selectivity(double pos, double avg_selection) override {return 0.5;}
double point_selectivity(Field *field, key_range *min_endp,
key_range *max_endp, double avg_selection) override;
/*
GSOC-TODO: This function should eventually replace point_selectivity(). See its code for more details.
*/
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment