Commit 1998b787 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-21130: Histograms: use JSON as on-disk format

Preparation for handling different kinds of histograms:

- In Column_statistics, change "Histogram histogram" into
  "Histogram *histogram_".  This allows for different kinds
  of Histogram classes with virtual functions.

- [Almost] remove the usage of Histogram->set_values and
  Histogram->set_size. The code outside the histogram should
  not make any assumptions about what/how is stored in the Histogram.

- Introduce drafts of methods to read/save histograms to/from disk.
parent fb2edab3
This diff is collapsed.
......@@ -43,7 +43,8 @@ enum enum_histogram_type
{
SINGLE_PREC_HB,
DOUBLE_PREC_HB,
JSON
JSON,
INVALID_HISTOGRAM
} Histogram_type;
enum enum_stat_tables
......@@ -141,40 +142,70 @@ double get_column_range_cardinality(Field *field,
bool is_stat_table(const LEX_CSTRING *db, LEX_CSTRING *table);
bool is_eits_usable(Field* field);
class Histogram
/*
Common base for all histograms
*/
class Histogram_base : public Sql_alloc
{
public:
virtual bool parse(MEM_ROOT *mem_root, Histogram_type type_arg,
const uchar *ptr, uint size)= 0;
virtual void serialize(Field *to_field)= 0;
private:
Histogram_type type;
uint8 size; /* Size of values array, in bytes */
uchar *values;
virtual Histogram_type get_type()=0;
// Legacy: return the size of the histogram on disk.
// This will be stored in mysql.column_stats.hist_size column.
// Newer, JSON-based histograms may return 0.
virtual uint get_size()=0;
uint prec_factor()
virtual ~Histogram_base(){}
};
class Histogram : public Histogram_base
{
public:
bool parse(MEM_ROOT *mem_root, Histogram_type type_arg,
const uchar *ptr_arg, uint size_arg) override;
void serialize(Field *to_field) override;
Histogram_type get_type() override { return type; }
uint get_size() override { return (uint) size; }
// returns number of buckets in the histogram
uint get_width()
{
switch (type) {
case SINGLE_PREC_HB:
case JSON:
return ((uint) (1 << 8) - 1);
return size;
case DOUBLE_PREC_HB:
return ((uint) (1 << 16) - 1);
return size / 2;
default:
DBUG_ASSERT(0);
}
return 1;
return 0;
}
public:
uint get_width()
private:
Histogram_type type;
uint8 size; /* Size of values array, in bytes */
uchar *values;
uint prec_factor()
{
switch (type) {
case SINGLE_PREC_HB:
case JSON:
return size;
return ((uint) (1 << 8) - 1);
case DOUBLE_PREC_HB:
return size / 2;
return ((uint) (1 << 16) - 1);
default:
DBUG_ASSERT(0);
}
return 0;
return 1;
}
private:
uint get_value(uint i)
{
DBUG_ASSERT(i < get_width());
......@@ -184,6 +215,8 @@ class Histogram
return (uint) (((uint8 *) values)[i]);
case DOUBLE_PREC_HB:
return (uint) uint2korr(values + i * 2);
default:
DBUG_ASSERT(0);
}
return 0;
}
......@@ -227,19 +260,13 @@ class Histogram
return i;
}
public:
uint get_size() { return (uint) size; }
Histogram_type get_type() { return type; }
uchar *get_values() { return (uchar *) values; }
public:
void init_for_collection(MEM_ROOT *mem_root, Histogram_type htype_arg, ulonglong size);
void set_size (ulonglong sz) { size= (uint8) sz; }
void set_type (Histogram_type t) { type= t; }
// Note: these two are used only for saving the JSON text:
void set_values (uchar *vals) { values= (uchar *) vals; }
void set_size (ulonglong sz) { size= (uint8) sz; }
bool is_available() { return get_size() > 0 && get_values(); }
......@@ -264,6 +291,9 @@ class Histogram
case DOUBLE_PREC_HB:
int2store(values + i * 2, val * prec_factor());
return;
default:
DBUG_ASSERT(0);
return;
}
}
......@@ -277,6 +307,9 @@ class Histogram
case DOUBLE_PREC_HB:
int2store(values + i * 2, uint2korr(values + i * 2 - 2));
return;
default:
DBUG_ASSERT(0);
return;
}
}
......@@ -314,7 +347,7 @@ class Table_statistics
/* Array of records per key for index prefixes */
ulonglong *idx_avg_frequency;
uchar *histograms; /* Sequence of histograms */
//uchar *histograms; /* Sequence of histograms */
};
......@@ -377,7 +410,8 @@ class Column_statistics
public:
Histogram histogram;
Histogram_type histogram_type_on_disk;
Histogram *histogram_;
uint32 no_values_provided_bitmap()
{
......
......@@ -679,7 +679,15 @@ class TABLE_STATISTICS_CB
public:
MEM_ROOT mem_root; /* MEM_ROOT to allocate statistical data for the table */
Table_statistics *table_stats; /* Structure to access the statistical data */
ulong total_hist_size; /* Total size of all histograms */
/*
Total size of all histograms. A value of 0 means historams are not present,
and histograms_are_ready() can finish sooner.
Currently we just set it to 1 when we expect to load histograms.
histogram-todo: rename this or even remove?
*/
ulong total_hist_size;
bool histograms_are_ready() const
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment