Commit 72c0ba43 authored by Sergei Petrunia's avatar Sergei Petrunia

Code cleanup part #1

parent f76e310a
......@@ -1179,14 +1179,17 @@ class Column_stat: public Stat_table
table_field->read_stats->set_avg_frequency(stat_field->val_real());
break;
case COLUMN_STAT_HIST_SIZE:
//TODO: ignore this. The size is a part of histogram!
//table_field->read_stats->histogram.set_size(stat_field->val_int());
/*
Ignore the contents of mysql.column_stats.hist_size. We take the
size from the mysql.column_stats.histogram column, itself.
*/
break;
case COLUMN_STAT_HIST_TYPE:
// TODO: save this next to histogram.
// For some reason, the histogram itself is read in
// read_histograms_for_table
{
/*
Save the histogram type. The histogram itself will be read in
read_histograms_for_table().
*/
Histogram_type hist_type= (Histogram_type) (stat_field->val_int() -
1);
table_field->read_stats->histogram_type_on_disk= hist_type;
......@@ -1247,21 +1250,24 @@ class Column_stat: public Stat_table
table_field->read_stats->histogram_= hist;
return hist;
}
//memcpy(table_field->read_stats->histogram_.get_values(),
// val.ptr(), table_field->read_stats->histogram.get_size());
}
return NULL;
}
};
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_arg, const uchar *ptr_arg, uint size_arg)
bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *,
Histogram_type type_arg,
const uchar *ptr_arg, uint size_arg)
{
// Just copy the data
size = (uint8) size_arg;
type = type_arg;
values = (uchar*)alloc_root(mem_root, size_arg);
memcpy(values, ptr_arg, size_arg);
return false;
if ((values = (uchar*)alloc_root(mem_root, size_arg)))
{
memcpy(values, ptr_arg, size_arg);
return false;
}
return true;
}
/*
......@@ -1269,7 +1275,7 @@ bool Histogram_binary::parse(MEM_ROOT *mem_root, Field *, Histogram_type type_ar
*/
void Histogram_binary::serialize(Field *field)
{
field->store((char*)get_values(), get_size(), &my_charset_bin);
field->store((char*)values, size, &my_charset_bin);
}
void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
......@@ -1282,20 +1288,32 @@ void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
}
void Histogram_json::init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size_arg)
void Histogram_json::init_for_collection(MEM_ROOT *mem_root,
Histogram_type htype_arg,
ulonglong size_arg)
{
type= htype_arg;
values = (uchar*)alloc_root(mem_root, size_arg);
size = (uint8) size_arg;
//values_ = (uchar*)alloc_root(mem_root, size_arg);
size= (uint8) size_arg;
}
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg, const uchar *ptr, uint size_arg)
/*
@brief
Parse the histogram from its on-disk representation
*/
bool Histogram_json::parse(MEM_ROOT *mem_root, Field *field,
Histogram_type type_arg, const uchar *ptr,
uint size_arg)
{
DBUG_ENTER("Histogram_json::parse");
size = (uint8) size_arg;
type = type_arg;
const char *json = (char *)ptr;
int vt;
std::vector<std::string> hist_buckets_text;
bool result = json_get_array_items(json, json + strlen(json), &vt, hist_buckets_text);
if (!result)
{
......@@ -1482,6 +1500,8 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
}
return sel;
}
/*
@param field The table field histogram is for. We don't care about the
field's current value, we only need its virtual functions to
......@@ -1492,14 +1512,13 @@ double Histogram_json::point_selectivity(Field *field, key_range *endpoint, doub
double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)
{
//fprintf(stderr, "Histogram_json::range_selectivity\n");
double min = 0.0, max = 1.0;
double width = 1.0/(int)histogram_bounds.size();
if (min_endp)
{
double min_sel = 0.0;
const uchar *min_key= min_endp->key;
// TODO: also, properly handle SQL NULLs.
// GSOC-TODO: properly handle SQL NULLs.
// in this test patch, we just assume the values are not SQL NULLs.
if (field->real_maybe_null())
min_key++;
......@@ -1573,8 +1592,7 @@ double Histogram_json::range_selectivity(Field *field, key_range *min_endp,
void Histogram_json::serialize(Field *field)
{
field->store((char*)get_values(), strlen((char*)get_values()),
&my_charset_bin);
field->store((char*)json_text, strlen((char*)json_text), &my_charset_bin);
}
int Histogram_json::find_bucket(Field *field, const uchar *endpoint)
......@@ -1583,7 +1601,7 @@ int Histogram_json::find_bucket(Field *field, const uchar *endpoint)
int high = (int)histogram_bounds.size()-1;
int mid;
int min_bucket_index = -1;
std::string mid_val;
std::string mid_val; // GSOC-todo: don't copy strings
while(low <= high) {
// c++ gives us the floor of integer divisions by default, below we get the ceiling (round-up).
......@@ -2037,9 +2055,9 @@ class Histogram_builder_json : public Histogram_builder
writer->add_str(value.c_str());
}
writer->end_array();
histogram->set_size(bucket_bounds.size());
Binary_string *json_string = (Binary_string *) writer->output.get_string();
((Histogram_json *)histogram)->set_values((uchar *) json_string->c_ptr());
Histogram_json *hist= (Histogram_json*)histogram;
hist->set_json_text(bucket_bounds.size(), (uchar *) json_string->c_ptr());
}
};
......@@ -2207,6 +2225,7 @@ class Count_distinct_field: public Sql_alloc
*/
void walk_tree_with_histogram(ha_rows rows)
{
// GSOC-TODO: is below a meaningful difference:
if (table_field->collected_stats->histogram_->get_type() == JSON_HB)
{
Histogram_builder_json hist_builder(table_field, tree_key_length, rows);
......@@ -2680,11 +2699,6 @@ int alloc_statistics_for_table(THD* thd, TABLE *table)
if (bitmap_is_set(table->read_set, (*field_ptr)->field_index))
{
column_stats->histogram_ = NULL;
/*
column_stats->histogram.set_size(hist_size);
column_stats->histogram.set_type(hist_type);
column_stats->histogram.set_values(histogram);
histogram+= hist_size;*/
(*field_ptr)->collected_stats= column_stats++;
}
}
......@@ -2950,9 +2964,9 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
}
if (count_distinct)
{
//uint hist_size= count_distinct->get_hist_size();
uint hist_size= current_thd->variables.histogram_size;
Histogram_type hist_type= (Histogram_type) (current_thd->variables.histogram_type);
Histogram_type hist_type=
(Histogram_type) (current_thd->variables.histogram_type);
bool have_histogram= false;
if (hist_size != 0 && hist_type != INVALID_HISTOGRAM)
{
......@@ -3001,12 +3015,11 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
}
else
have_histogram= false ; // TODO: need this?
//histogram.set_size(hist_size);
set_not_null(COLUMN_STAT_HIST_SIZE);
if (have_histogram && distincts)
{
set_not_null(COLUMN_STAT_HIST_TYPE);
//histogram.set_values(count_distinct->get_histogram());
histogram_= count_distinct->get_histogram();
set_not_null(COLUMN_STAT_HISTOGRAM);
}
......
......@@ -157,22 +157,17 @@ class Histogram_base : public Sql_alloc
virtual uint get_width()=0;
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size)=0;
virtual void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg,
ulonglong size)=0;
virtual bool is_available()=0;
virtual bool is_usable(THD *thd)=0;
virtual void set_values(uchar * values)=0;
virtual uchar *get_values()=0;
virtual void set_size(ulonglong sz)=0;
virtual double point_selectivity(Field *field, key_range *endpoint, double avg_selection)=0;
virtual double point_selectivity(Field *field, key_range *endpoint,
double avg_selection)=0;
virtual double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp)=0;
key_range *max_endp)=0;
// Legacy: return the size of the histogram on disk.
// This will be stored in mysql.column_stats.hist_size column.
......@@ -181,6 +176,11 @@ class Histogram_base : public Sql_alloc
virtual ~Histogram_base()= default;
};
/*
A Height-balanced histogram that stores numeric fractions
*/
class Histogram_binary : public Histogram_base
{
public:
......@@ -274,17 +274,12 @@ class Histogram_binary : public Histogram_base
return i;
}
uchar *get_values() override { return (uchar *) values; }
public:
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size) override;
// Note: these two are used only for saving the JSON text:
void set_values (uchar *vals) override { values= (uchar *) vals; }
void set_size (ulonglong sz) override { size= (uint8) sz; }
uint get_size() override {return (uint)size;}
bool is_available() override { return get_size() > 0 && get_values(); }
bool is_available() override { return get_size() > 0 && (values!=NULL); }
/*
This function checks that histograms should be usable only when
......@@ -328,58 +323,57 @@ class Histogram_binary : public Histogram_base
}
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
key_range *max_endp) override;
/*
Estimate selectivity of "col=const" using a histogram
*/
double point_selectivity(Field *field, key_range *endpoint, double avg_sel) override;
double point_selectivity(Field *field, key_range *endpoint,
double avg_sel) override;
};
/*
An equi-height histogram which stores real values for bucket bounds.
*/
class Histogram_json : public Histogram_base
{
private:
Histogram_type type;
uint8 size; /* Number of elements in the histogram*/
/*
GSOC-TODO: This is used for storing collected JSON text. Rename it
accordingly.
*/
uchar *values;
// List of values in string form.
/*
GSOC-TODO: We don't need to save this. It can be a local variable in
parse().
Eventually we should get rid of this at all, as we can convert the
endpoints and add them to histogram_bounds as soon as we've read them.
*/
std::vector<std::string> hist_buckets_text;
/* Collection-time only: collected histogram in the JSON form. */
uchar *json_text;
// Array of histogram bucket endpoints in KeyTupleFormat.
std::vector<std::string> histogram_bounds;
public:
bool parse(MEM_ROOT *mem_root, Field *field, Histogram_type type_arg,
const uchar *ptr, uint size) override;
const uchar *ptr, uint size) override;
void serialize(Field *field) override;
// returns number of buckets in the histogram
uint get_width() override
{
return size;
};
return size;
}
Histogram_type get_type() override
{
return JSON_HB;
}
void set_size (ulonglong sz) override {size = (uint8) sz; }
void set_json_text(ulonglong sz, uchar *json_text_arg)
{
size = (uint8) sz;
json_text= json_text_arg;
}
uint get_size() override {
uint get_size() override
{
return size;
}
......@@ -393,15 +387,10 @@ class Histogram_json : public Histogram_base
is_available();
}
void set_values (uchar *vals) override { values= (uchar *) vals; }
uchar *get_values() override { return (uchar *) values; }
double point_selectivity(Field *field, key_range *endpoint, double avg_selection) override;
double point_selectivity(Field *field, key_range *endpoint,
double avg_selection) override;
double range_selectivity(Field *field, key_range *min_endp,
key_range *max_endp) override;
key_range *max_endp) override;
/*
* Returns the index of the biggest histogram value that is smaller than endpoint
*/
......
......@@ -1869,7 +1869,7 @@ int json_path_compare(const json_path_t *a, const json_path_t *b,
enum json_types json_smart_read_value(json_engine_t *je,
const char **value, int *value_len)
const char **value, int *value_len)
{
if (json_read_value(je))
goto err_return;
......@@ -1952,6 +1952,7 @@ enum json_types json_get_array_item(const char *js, const char *js_end,
return JSV_BAD_JSON;
}
/** Simple json lookup for a value by the key.
Expects JSON object.
......@@ -2027,8 +2028,6 @@ enum json_types json_get_object_nkey(const char *js __attribute__((unused)),
return JSV_NOTHING;
}
/** Check if json is valid (well-formed)
@retval 0 - success, json is well-formed
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment