Commit 72c0ba43 authored by Sergei Petrunia's avatar Sergei Petrunia

Code cleanup part #1

parent f76e310a
...@@ -1179,14 +1179,17 @@ class Column_stat: public Stat_table ...@@ -1179,14 +1179,17 @@ class Column_stat: public Stat_table
table_field->read_stats->set_avg_frequency(stat_field->val_real()); table_field->read_stats->set_avg_frequency(stat_field->val_real());
break; break;
case COLUMN_STAT_HIST_SIZE: case COLUMN_STAT_HIST_SIZE:
//TODO: ignore this. The size is a part of histogram! /*
//table_field->read_stats->histogram.set_size(stat_field->val_int()); Ignore the contents of mysql.column_stats.hist_size. We take the
size from the mysql.column_stats.histogram column, itself.
*/
break; break;
case COLUMN_STAT_HIST_TYPE: case COLUMN_STAT_HIST_TYPE:
// TODO: save this next to histogram.
// For some reason, the histogram itself is read in
// read_histograms_for_table
{ {
/*
Save the histogram type. The histogram itself will be read in
read_histograms_for_table().
*/
Histogram_type hist_type= (Histogram_type) (stat_field->val_int() - Histogram_type hist_type= (Histogram_type) (stat_field->val_int() -
1); 1);
table_field->read_stats->histogram_type_on_disk= hist_type; table_field->read_stats->histogram_type_on_disk= hist_type;
...@@ -1247,21 +1250,24 @@ class Column_stat: public Stat_table ...@@ -1247,21 +1250,24 @@ class Column_stat: public Stat_table
table_field->read_stats->histogram_= hist; table_field->read_stats->histogram_= hist;
return hist; return hist;
} }
//memcpy(table_field->read_stats->histogram_.get_values(),
// val.ptr(), table_field->read_stats->histogram.get_size());
} }
return NULL; return NULL;
} }
}; };
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg, const uchar *ptr_arg, uint size_arg) bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *,
Histogram_type type_arg,
const uchar *ptr_arg, uint size_arg)
{ {
// Just copy the data // Just copy the data
size = (uint8) size_arg; size = (uint8) size_arg;
type = type_arg; type = type_arg;
values = (uchar*)alloc_root(mem_root, size_arg); if ((values = (uchar*)alloc_root(mem_root, size_arg)))
{
memcpy(values, ptr_arg, size_arg); memcpy(values, ptr_arg, size_arg);
return false; return false;
}
return true;
} }
/* /*
...@@ -1269,7 +1275,7 @@ bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_ar ...@@ -1269,7 +1275,7 @@ bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_ar
*/ */
void Histogram_binary::serialize(Field *field) void Histogram_binary::serialize(Field *field)
{ {
field->store((char*)get_values(), get_size(), &my_charset_bin); field->store((char*)values, size, &my_charset_bin);
} }
void Histogram_binary::init_for_collection(MEM_ROOT *mem_root, void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
...@@ -1282,20 +1288,32 @@ void Histogram_binary::init_for_collection(MEM_ROOT *mem_root, ...@@ -1282,20 +1288,32 @@ void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
} }
void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size_arg) void Histogram_json::init_for_collection(MEM_ROOT *mem_root,
Histogram_type htype_arg,
ulonglong size_arg)
{ {
type= htype_arg; type= htype_arg;
values = (uchar*)alloc_root(mem_root, size_arg); //values_ = (uchar*)alloc_root(mem_root, size_arg);
size = (uint8) size_arg; size= (uint8) size_arg;
} }
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, const uchar *ptr, uint size_arg)
/*
@brief
Parse the histogram from its on-disk representation
*/
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field,
Histogram_type type_arg, const uchar *ptr,
uint size_arg)
{ {
DBUG_ENTER("Histogram_json::parse"); DBUG_ENTER("Histogram_json::parse");
size = (uint8) size_arg; size = (uint8) size_arg;
type = type_arg; type = type_arg;
const char *json = (char *)ptr; const char *json = (char *)ptr;
int vt; int vt;
std::vector<std::string> hist_buckets_text;
bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets_text); bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets_text);
if (!result) if (!result)
{ {
...@@ -1482,6 +1500,8 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub ...@@ -1482,6 +1500,8 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
} }
return sel; return sel;
} }
/* /*
@param field The table field histogram is for. We don't care about the @param field The table field histogram is for. We don't care about the
field's current value, we only need its virtual functions to field's current value, we only need its virtual functions to
...@@ -1492,14 +1512,13 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub ...@@ -1492,14 +1512,13 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
double Histogram_json::range_selectivity(Field *field, key_range *min_endp, double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) key_range *max_endp)
{ {
//fprintf(stderr, "Histogram_json::range_selectivity\n");
double min = 0.0, max = 1.0; double min = 0.0, max = 1.0;
double width = 1.0/(int)histogram_bounds.size(); double width = 1.0/(int)histogram_bounds.size();
if (min_endp) if (min_endp)
{ {
double min_sel = 0.0; double min_sel = 0.0;
const uchar *min_key= min_endp->key; const uchar *min_key= min_endp->key;
// TODO: also, properly handle SQL NULLs. // GSOC-TODO: properly handle SQL NULLs.
// in this test patch, we just assume the values are not SQL NULLs. // in this test patch, we just assume the values are not SQL NULLs.
if (field->real_maybe_null()) if (field->real_maybe_null())
min_key++; min_key++;
...@@ -1573,8 +1592,7 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp, ...@@ -1573,8 +1592,7 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
void Histogram_json::serialize(Field *field) void Histogram_json::serialize(Field *field)
{ {
field->store((char*)get_values(), strlen((char*)get_values()), field->store((char*)json_text, strlen((char*)json_text), &my_charset_bin);
&my_charset_bin);
} }
int Histogram_json::find_bucket(Field *field, const uchar *endpoint) int Histogram_json::find_bucket(Field *field, const uchar *endpoint)
...@@ -1583,7 +1601,7 @@ int Histogram_json::find_bucket(Field *field, const uchar *endpoint) ...@@ -1583,7 +1601,7 @@ int Histogram_json::find_bucket(Field *field, const uchar *endpoint)
int high = (int)histogram_bounds.size()-1; int high = (int)histogram_bounds.size()-1;
int mid; int mid;
int min_bucket_index = -1; int min_bucket_index = -1;
std::string mid_val; std::string mid_val; // GSOC-todo: don't copy strings
while(low <= high) { while(low <= high) {
// c++ gives us the floor of integer divisions by default, below we get the ceiling (round-up). // c++ gives us the floor of integer divisions by default, below we get the ceiling (round-up).
...@@ -2037,9 +2055,9 @@ class Histogram_builder_json : public Histogram_builder ...@@ -2037,9 +2055,9 @@ class Histogram_builder_json : public Histogram_builder
writer->add_str(value.c_str()); writer->add_str(value.c_str());
} }
writer->end_array(); writer->end_array();
histogram->set_size(bucket_bounds.size());
Binary_string *json_string = (Binary_string *) writer->output.get_string(); Binary_string *json_string = (Binary_string *) writer->output.get_string();
((Histogram_json *)histogram)->set_values((uchar *) json_string->c_ptr()); Histogram_json *hist= (Histogram_json*)histogram;
hist->set_json_text(bucket_bounds.size(), (uchar *) json_string->c_ptr());
} }
}; };
...@@ -2207,6 +2225,7 @@ class Count_distinct_field: public Sql_alloc ...@@ -2207,6 +2225,7 @@ class Count_distinct_field: public Sql_alloc
*/ */
void walk_tree_with_histogram(ha_rows rows) void walk_tree_with_histogram(ha_rows rows)
{ {
// GSOC-TODO: is below a meaningful difference:
if (table_field->collected_stats->histogram_->get_type() == JSON_HB) if (table_field->collected_stats->histogram_->get_type() == JSON_HB)
{ {
Histogram_builder_json hist_builder(table_field, tree_key_length, rows); Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
...@@ -2680,11 +2699,6 @@ int alloc_statistics_for_table(THD* thd, TABLE *table) ...@@ -2680,11 +2699,6 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
if (bitmap_is_set(table->read_set, (*field_ptr)->field_index)) if (bitmap_is_set(table->read_set, (*field_ptr)->field_index))
{ {
column_stats->histogram_ = NULL; column_stats->histogram_ = NULL;
/*
column_stats->histogram.set_size(hist_size);
column_stats->histogram.set_type(hist_type);
column_stats->histogram.set_values(histogram);
histogram+= hist_size;*/
(*field_ptr)->collected_stats= column_stats++; (*field_ptr)->collected_stats= column_stats++;
} }
} }
...@@ -2950,9 +2964,9 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl ...@@ -2950,9 +2964,9 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
} }
if (count_distinct) if (count_distinct)
{ {
//uint hist_size= count_distinct->get_hist_size();
uint hist_size= current_thd->variables.histogram_size; uint hist_size= current_thd->variables.histogram_size;
Histogram_type hist_type= (Histogram_type) (current_thd->variables.histogram_type); Histogram_type hist_type=
(Histogram_type) (current_thd->variables.histogram_type);
bool have_histogram= false; bool have_histogram= false;
if (hist_size != 0 && hist_type != INVALID_HISTOGRAM) if (hist_size != 0 && hist_type != INVALID_HISTOGRAM)
{ {
...@@ -3001,12 +3015,11 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl ...@@ -3001,12 +3015,11 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
} }
else else
have_histogram= false ; // TODO: need this? have_histogram= false ; // TODO: need this?
//histogram.set_size(hist_size);
set_not_null(COLUMN_STAT_HIST_SIZE); set_not_null(COLUMN_STAT_HIST_SIZE);
if (have_histogram && distincts) if (have_histogram && distincts)
{ {
set_not_null(COLUMN_STAT_HIST_TYPE); set_not_null(COLUMN_STAT_HIST_TYPE);
//histogram.set_values(count_distinct->get_histogram());
histogram_= count_distinct->get_histogram(); histogram_= count_distinct->get_histogram();
set_not_null(COLUMN_STAT_HISTOGRAM); set_not_null(COLUMN_STAT_HISTOGRAM);
} }
......
...@@ -157,20 +157,15 @@ class Histogram_base : public Sql_alloc ...@@ -157,20 +157,15 @@ class Histogram_base : public Sql_alloc
virtual uint get_width()=0; virtual uint get_width()=0;
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size)=0; virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size)=0;
virtual bool is_available()=0; virtual bool is_available()=0;
virtual bool is_usable(THD *thd)=0; virtual bool is_usable(THD *thd)=0;
virtual void set_values(uchar * values)=0; virtual double point_selectivity(Field *field, key_range *endpoint,
double avg_selection)=0;
virtual uchar *get_values()=0;
virtual void set_size(ulonglong sz)=0;
virtual double point_selectivity(Field *field, key_range *endpoint, double avg_selection)=0;
virtual double range_selectivity(Field *field, key_range *min_endp, virtual double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)=0; key_range *max_endp)=0;
...@@ -181,6 +176,11 @@ class Histogram_base : public Sql_alloc ...@@ -181,6 +176,11 @@ class Histogram_base : public Sql_alloc
virtual ~Histogram_base()= default; virtual ~Histogram_base()= default;
}; };
/*
A Height-balanced histogram that stores numeric fractions
*/
class Histogram_binary : public Histogram_base class Histogram_binary : public Histogram_base
{ {
public: public:
...@@ -274,17 +274,12 @@ class Histogram_binary : public Histogram_base ...@@ -274,17 +274,12 @@ class Histogram_binary : public Histogram_base
return i; return i;
} }
uchar *get_values() override { return (uchar *) values; }
public: public:
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override; void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override;
// Note: these two are used only for saving the JSON text:
void set_values (uchar *vals) override { values= (uchar *) vals; }
void set_size (ulonglong sz) override { size= (uint8) sz; }
uint get_size() override {return (uint)size;} uint get_size() override {return (uint)size;}
bool is_available() override { return get_size() > 0 && get_values(); } bool is_available() override { return get_size() > 0 && (values!=NULL); }
/* /*
This function checks that histograms should be usable only when This function checks that histograms should be usable only when
...@@ -333,29 +328,23 @@ class Histogram_binary : public Histogram_base ...@@ -333,29 +328,23 @@ class Histogram_binary : public Histogram_base
/* /*
Estimate selectivity of "col=const" using a histogram Estimate selectivity of "col=const" using a histogram
*/ */
double point_selectivity(Field *field, key_range *endpoint, double avg_sel) override; double point_selectivity(Field *field, key_range *endpoint,
double avg_sel) override;
}; };
/*
An equi-height histogram which stores real values for bucket bounds.
*/
class Histogram_json : public Histogram_base class Histogram_json : public Histogram_base
{ {
private: private:
Histogram_type type; Histogram_type type;
uint8 size; /* Number of elements in the histogram*/ uint8 size; /* Number of elements in the histogram*/
/* /* Collection-time only: collected histogram in the JSON form. */
GSOC-TODO: This is used for storing collected JSON text. Rename it uchar *json_text;
accordingly.
*/
uchar *values;
// List of values in string form.
/*
GSOC-TODO: We don't need to save this. It can be a local variable in
parse().
Eventually we should get rid of this at all, as we can convert the
endpoints and add them to histogram_bounds as soon as we've read them.
*/
std::vector<std::string> hist_buckets_text;
// Array of histogram bucket endpoints in KeyTupleFormat. // Array of histogram bucket endpoints in KeyTupleFormat.
std::vector<std::string> histogram_bounds; std::vector<std::string> histogram_bounds;
...@@ -370,16 +359,21 @@ class Histogram_json : public Histogram_base ...@@ -370,16 +359,21 @@ class Histogram_json : public Histogram_base
uint get_width() override uint get_width() override
{ {
return size; return size;
}; }
Histogram_type get_type() override Histogram_type get_type() override
{ {
return JSON_HB; return JSON_HB;
} }
void set_size (ulonglong sz) override {size = (uint8) sz; } void set_json_text(ulonglong sz, uchar *json_text_arg)
{
size = (uint8) sz;
json_text= json_text_arg;
}
uint get_size() override { uint get_size() override
{
return size; return size;
} }
...@@ -393,15 +387,10 @@ class Histogram_json : public Histogram_base ...@@ -393,15 +387,10 @@ class Histogram_json : public Histogram_base
is_available(); is_available();
} }
void set_values (uchar *vals) override { values= (uchar *) vals; } double point_selectivity(Field *field, key_range *endpoint,
double avg_selection) override;
uchar *get_values() override { return (uchar *) values; }
double point_selectivity(Field *field, key_range *endpoint, double avg_selection) override;
double range_selectivity(Field *field, key_range *min_endp, double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override; key_range *max_endp) override;
/* /*
* Returns the index of the biggest histogram value that is smaller than endpoint * Returns the index of the biggest histogram value that is smaller than endpoint
*/ */
......
...@@ -1952,6 +1952,7 @@ enum json_types json_get_array_item(const char *js, const char *js_end, ...@@ -1952,6 +1952,7 @@ enum json_types json_get_array_item(const char *js, const char *js_end,
return JSV_BAD_JSON; return JSV_BAD_JSON;
} }
/** Simple json lookup for a value by the key. /** Simple json lookup for a value by the key.
Expects JSON object. Expects JSON object.
...@@ -2027,8 +2028,6 @@ enum json_types json_get_object_nkey(const char *js __attribute__((unused)), ...@@ -2027,8 +2028,6 @@ enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
return JSV_NOTHING; return JSV_NOTHING;
} }
/** Check if json is valid (well-formed) /** Check if json is valid (well-formed)
@retval 0 - success, json is well-formed @retval 0 - success, json is well-formed
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment