Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
1fa7af74
Commit
1fa7af74
authored
Jul 28, 2021
by
Michael Okoko
Committed by
Sergei Petrunia
Jan 19, 2022
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Split histogram classes and into JSON and binary classes
Signed-off-by:
Michael Okoko
<
okokomichaels@outlook.com
>
parent
1998b787
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
132 additions
and
39 deletions
+132
-39
sql/sql_statistics.cc
sql/sql_statistics.cc
+57
-22
sql/sql_statistics.h
sql/sql_statistics.h
+75
-17
No files found.
sql/sql_statistics.cc
View file @
1fa7af74
...
...
@@ -76,6 +76,8 @@ std::vector<std::string> parse_histogram_from_json(const char *json);
void
test_parse_histogram_from_json
();
Histogram_base
*
create_histogram
(
Histogram_type
hist_type
);
/* Currently there are only 3 persistent statistical tables */
static
const
uint
STATISTICS_TABLES
=
3
;
...
...
@@ -1217,9 +1219,9 @@ class Column_stat: public Stat_table
The method assumes that the value of histogram size and the pointer to
the histogram location has been already set in the fields size and values
of read_stats->histogram.
*/
*/
Histogram
*
load_histogram
(
MEM_ROOT
*
mem_root
)
Histogram
_binary
*
load_histogram
(
MEM_ROOT
*
mem_root
)
{
if
(
find_stat
())
{
...
...
@@ -1230,7 +1232,7 @@ class Column_stat: public Stat_table
table_field
->
read_stats
->
set_not_null
(
fldno
);
stat_field
->
val_str
(
&
val
);
// histogram-todo: here, create the histogram of appropriate type.
Histogram
*
hist
=
new
(
mem_root
)
Histogram
();
Histogram
_binary
*
hist
=
new
(
mem_root
)
Histogram_binary
();
if
(
!
hist
->
parse
(
mem_root
,
table_field
->
read_stats
->
histogram_type_on_disk
,
(
const
uchar
*
)
val
.
ptr
(),
val
.
length
()))
{
...
...
@@ -1244,7 +1246,7 @@ class Column_stat: public Stat_table
}
};
bool
Histogram
::
parse
(
MEM_ROOT
*
mem_root
,
Histogram_type
type_arg
,
const
uchar
*
ptr_arg
,
uint
size_arg
)
bool
Histogram
_binary
::
parse
(
MEM_ROOT
*
mem_root
,
Histogram_type
type_arg
,
const
uchar
*
ptr_arg
,
uint
size_arg
)
{
// Just copy the data
size
=
(
uint8
)
size_arg
;
...
...
@@ -1258,7 +1260,7 @@ bool Histogram::parse(MEM_ROOT *mem_root, Histogram_type type_arg, const uchar *
/*
Save the histogram data info a table field.
*/
void
Histogram
::
serialize
(
Field
*
field
)
void
Histogram
_binary
::
serialize
(
Field
*
field
)
{
if
(
get_type
()
==
JSON
)
{
...
...
@@ -1269,7 +1271,7 @@ void Histogram::serialize(Field *field)
field
->
store
((
char
*
)
get_values
(),
get_size
(),
&
my_charset_bin
);
}
void
Histogram
::
init_for_collection
(
MEM_ROOT
*
mem_root
,
void
Histogram
_binary
::
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size_arg
)
{
...
...
@@ -1278,6 +1280,13 @@ void Histogram::init_for_collection(MEM_ROOT *mem_root,
size
=
(
uint8
)
size_arg
;
}
void
Histogram_json
::
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size_arg
)
{
type
=
htype_arg
;
values
=
(
uchar
*
)
alloc_root
(
mem_root
,
size_arg
);
size
=
(
uint8
)
size_arg
;
}
/*
An object of the class Index_stat is created to read statistical
data on tables from the statistical table table_stat, to update
...
...
@@ -1595,13 +1604,13 @@ class Stat_table_write_iter
class
Histogram_builder
{
pr
otected
:
pr
ivate
:
Field
*
column
;
/* table field for which the histogram is built */
uint
col_length
;
/* size of this field */
ha_rows
records
;
/* number of records the histogram is built for */
Field
*
min_value
;
/* pointer to the minimal value for the field */
Field
*
max_value
;
/* pointer to the maximal value for the field */
Histogram
*
histogram
;
/* the histogram location */
Histogram
_binary
*
histogram
;
/* the histogram location */
uint
hist_width
;
/* the number of points in the histogram */
double
bucket_capacity
;
/* number of rows in a bucket of the histogram */
uint
curr_bucket
;
/* number of the current bucket to be built */
...
...
@@ -1617,7 +1626,7 @@ class Histogram_builder
Column_statistics
*
col_stats
=
col
->
collected_stats
;
min_value
=
col_stats
->
min_value
;
max_value
=
col_stats
->
max_value
;
histogram
=
col_stats
->
histogram_
;
histogram
=
dynamic_cast
<
Histogram_binary
*>
(
col_stats
->
histogram_
)
;
hist_width
=
histogram
->
get_width
();
bucket_capacity
=
(
double
)
records
/
(
hist_width
+
1
);
curr_bucket
=
0
;
...
...
@@ -1626,6 +1635,8 @@ class Histogram_builder
count_distinct_single_occurence
=
0
;
}
Histogram_builder
()
=
default
;
virtual
~
Histogram_builder
()
=
default
;
ulonglong
get_count_distinct
()
const
{
return
count_distinct
;
}
...
...
@@ -1661,16 +1672,29 @@ class Histogram_builder
class
Histogram_builder_json
:
public
Histogram_builder
{
std
::
vector
<
std
::
string
>
bucket_bounds
=
{};
Field
*
column
;
/* table field for which the histogram is built */
uint
col_length
;
/* size of this field */
ha_rows
records
;
/* number of records the histogram is built for */
Field
*
min_value
;
/* pointer to the minimal value for the field */
Field
*
max_value
;
/* pointer to the maximal value for the field */
Histogram_json
*
histogram
;
/* the histogram location */
uint
hist_width
;
/* the number of points in the histogram */
double
bucket_capacity
;
/* number of rows in a bucket of the histogram */
uint
curr_bucket
;
/* number of the current bucket to be built */
ulonglong
count
;
/* number of values retrieved */
ulonglong
count_distinct
;
/* number of distinct values retrieved */
/* number of distinct values that occured only once */
ulonglong
count_distinct_single_occurence
;
std
::
vector
<
std
::
string
>
bucket_bounds
=
{};
public:
Histogram_builder_json
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
:
Histogram_builder
(
col
,
col_len
,
rows
)
:
column
(
col
),
col_length
(
col_len
),
records
(
rows
)
{
Column_statistics
*
col_stats
=
col
->
collected_stats
;
min_value
=
col_stats
->
min_value
;
max_value
=
col_stats
->
max_value
;
histogram
=
col_stats
->
histogram_
;
histogram
=
dynamic_cast
<
Histogram_json
*>
(
col_stats
->
histogram_
)
;
hist_width
=
histogram
->
get_width
();
bucket_capacity
=
(
double
)
records
/
(
hist_width
+
1
);
curr_bucket
=
0
;
...
...
@@ -1718,6 +1742,15 @@ std::vector<std::string> bucket_bounds = {};
}
};
Histogram_base
*
create_histogram
(
Histogram_type
hist_type
)
{
// assumes the caller already checked for invalid histograms
if
(
hist_type
==
JSON
)
return
new
Histogram_json
;
else
return
new
Histogram_binary
;
}
void
test_parse_histogram_from_json
()
{
std
::
vector
<
std
::
string
>
bucket
=
{};
...
...
@@ -1954,9 +1987,9 @@ class Count_distinct_field: public Sql_alloc
@brief
Get the pointer to the histogram built for table_field
*/
Histogram
*
get_histogram
()
Histogram
_binary
*
get_histogram
()
{
return
table_field
->
collected_stats
->
histogram_
;
return
dynamic_cast
<
Histogram_binary
*>
(
table_field
->
collected_stats
->
histogram_
)
;
}
};
...
...
@@ -2608,18 +2641,18 @@ bool Column_statistics_collected::add()
/*
Create an empty Histogram object from histogram_type.
Create an empty Histogram
_binary
object from histogram_type.
Note: it is not yet clear whether collection-time histogram should be the same
as lookup-time histogram. At the moment, they are.
*/
Histogram
*
get_histogram_by_type
(
MEM_ROOT
*
mem_root
,
Histogram_type
hist_type
)
{
Histogram
_binary
*
get_histogram_by_type
(
MEM_ROOT
*
mem_root
,
Histogram_type
hist_type
)
{
switch
(
hist_type
)
{
case
SINGLE_PREC_HB
:
case
DOUBLE_PREC_HB
:
case
JSON
:
return
new
Histogram
();
return
new
Histogram
_binary
();
default:
DBUG_ASSERT
(
0
);
}
...
...
@@ -2660,7 +2693,7 @@ void Column_statistics_collected::finish(MEM_ROOT *mem_root, ha_rows rows, doubl
if
(
hist_size
!=
0
&&
hist_type
!=
INVALID_HISTOGRAM
)
{
have_histogram
=
true
;
histogram_
=
new
Histogram
;
histogram_
=
create_histogram
(
hist_type
)
;
histogram_
->
init_for_collection
(
mem_root
,
hist_type
,
hist_size
);
}
...
...
@@ -4048,7 +4081,8 @@ double get_column_range_cardinality(Field *field,
if
(
avg_frequency
>
1.0
+
0.000001
&&
col_stats
->
min_max_values_are_provided
())
{
Histogram
*
hist
=
col_stats
->
histogram_
;
Histogram_binary
*
hist
=
dynamic_cast
<
Histogram_binary
*>
(
col_stats
->
histogram_
);
if
(
hist
&&
hist
->
is_usable
(
thd
))
{
store_key_image_to_rec
(
field
,
(
uchar
*
)
min_endp
->
key
,
...
...
@@ -4092,7 +4126,8 @@ double get_column_range_cardinality(Field *field,
else
max_mp_pos
=
1.0
;
Histogram
*
hist
=
col_stats
->
histogram_
;
Histogram_binary
*
hist
=
dynamic_cast
<
Histogram_binary
*>
(
col_stats
->
histogram_
);
if
(
hist
&&
hist
->
is_usable
(
thd
))
sel
=
hist
->
range_selectivity
(
min_mp_pos
,
max_mp_pos
);
else
...
...
@@ -4143,7 +4178,7 @@ double get_column_range_cardinality(Field *field,
value.
*/
double
Histogram
::
point_selectivity
(
double
pos
,
double
avg_sel
)
double
Histogram
_binary
::
point_selectivity
(
double
pos
,
double
avg_sel
)
{
double
sel
;
/* Find the bucket that contains the value 'pos'. */
...
...
@@ -4179,7 +4214,7 @@ double Histogram::point_selectivity(double pos, double avg_sel)
/*
The value 'pos' fits within one single histogram bucket.
Histogram buckets have the same numbers of rows, but they cover
Histogram
_binary
buckets have the same numbers of rows, but they cover
different ranges of values.
We assume that values are uniformly distributed across the [0..1] value
...
...
sql/sql_statistics.h
View file @
1fa7af74
...
...
@@ -153,6 +153,24 @@ class Histogram_base : public Sql_alloc
virtual
void
serialize
(
Field
*
to_field
)
=
0
;
virtual
Histogram_type
get_type
()
=
0
;
virtual
uint
get_width
()
=
0
;
virtual
void
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size
)
=
0
;
virtual
bool
is_available
()
=
0
;
virtual
bool
is_usable
(
THD
*
thd
)
=
0
;
virtual
void
set_values
(
uchar
*
values
)
=
0
;
virtual
uchar
*
get_values
()
=
0
;
virtual
void
set_size
(
ulonglong
sz
)
=
0
;
virtual
double
range_selectivity
(
double
min_pos
,
double
max_pos
)
=
0
;
virtual
double
point_selectivity
(
double
pos
,
double
avg_selection
)
=
0
;
// Legacy: return the size of the histogram on disk.
// This will be stored in mysql.column_stats.hist_size column.
...
...
@@ -162,22 +180,21 @@ class Histogram_base : public Sql_alloc
virtual
~
Histogram_base
(){}
};
class
Histogram
:
public
Histogram_base
class
Histogram
_binary
:
public
Histogram_base
{
public:
bool
parse
(
MEM_ROOT
*
mem_root
,
Histogram_type
type_arg
,
const
uchar
*
ptr_arg
,
uint
size_arg
)
override
;
void
serialize
(
Field
*
to_field
)
override
;
Histogram_type
get_type
()
override
{
return
type
;
}
uint
get_size
()
override
{
return
(
uint
)
size
;
}
// returns number of buckets in the histogram
uint
get_width
()
uint
get_width
()
override
{
switch
(
type
)
{
case
SINGLE_PREC_HB
:
case
JSON
:
return
size
;
case
DOUBLE_PREC_HB
:
return
size
/
2
;
...
...
@@ -196,7 +213,6 @@ class Histogram : public Histogram_base
{
switch
(
type
)
{
case
SINGLE_PREC_HB
:
case
JSON
:
return
((
uint
)
(
1
<<
8
)
-
1
);
case
DOUBLE_PREC_HB
:
return
((
uint
)
(
1
<<
16
)
-
1
);
...
...
@@ -211,7 +227,6 @@ class Histogram : public Histogram_base
DBUG_ASSERT
(
i
<
get_width
());
switch
(
type
)
{
case
SINGLE_PREC_HB
:
case
JSON
:
return
(
uint
)
(((
uint8
*
)
values
)[
i
]);
case
DOUBLE_PREC_HB
:
return
(
uint
)
uint2korr
(
values
+
i
*
2
);
...
...
@@ -260,22 +275,22 @@ class Histogram : public Histogram_base
return
i
;
}
uchar
*
get_values
()
{
return
(
uchar
*
)
values
;
}
uchar
*
get_values
()
override
{
return
(
uchar
*
)
values
;
}
public:
void
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size
);
void
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size
)
override
;
// Note: these two are used only for saving the JSON text:
void
set_values
(
uchar
*
vals
)
{
values
=
(
uchar
*
)
vals
;
}
void
set_size
(
ulonglong
sz
)
{
size
=
(
uint8
)
sz
;
}
void
set_values
(
uchar
*
vals
)
override
{
values
=
(
uchar
*
)
vals
;
}
void
set_size
(
ulonglong
sz
)
override
{
size
=
(
uint8
)
sz
;
}
bool
is_available
()
{
return
get_size
()
>
0
&&
get_values
();
}
bool
is_available
()
override
{
return
get_size
()
>
0
&&
get_values
();
}
/*
This function checks that histograms should be usable only when
1) the level of optimizer_use_condition_selectivity > 3
2) histograms have been collected
*/
bool
is_usable
(
THD
*
thd
)
bool
is_usable
(
THD
*
thd
)
override
{
return
thd
->
variables
.
optimizer_use_condition_selectivity
>
3
&&
is_available
();
...
...
@@ -285,7 +300,6 @@ class Histogram : public Histogram_base
{
switch
(
type
)
{
case
SINGLE_PREC_HB
:
case
JSON
:
((
uint8
*
)
values
)[
i
]
=
(
uint8
)
(
val
*
prec_factor
());
return
;
case
DOUBLE_PREC_HB
:
...
...
@@ -301,7 +315,6 @@ class Histogram : public Histogram_base
{
switch
(
type
)
{
case
SINGLE_PREC_HB
:
case
JSON
:
((
uint8
*
)
values
)[
i
]
=
((
uint8
*
)
values
)[
i
-
1
];
return
;
case
DOUBLE_PREC_HB
:
...
...
@@ -313,7 +326,7 @@ class Histogram : public Histogram_base
}
}
double
range_selectivity
(
double
min_pos
,
double
max_pos
)
double
range_selectivity
(
double
min_pos
,
double
max_pos
)
override
{
double
sel
;
double
bucket_sel
=
1.0
/
(
get_width
()
+
1
);
...
...
@@ -326,9 +339,54 @@ class Histogram : public Histogram_base
/*
Estimate selectivity of "col=const" using a histogram
*/
double
point_selectivity
(
double
pos
,
double
avg_sel
);
double
point_selectivity
(
double
pos
,
double
avg_sel
)
override
;
};
class
Histogram_json
:
public
Histogram_base
{
private:
Histogram_type
type
;
uint8
size
;
/* Number of elements in the histogram*/
uchar
*
values
;
public:
bool
parse
(
MEM_ROOT
*
mem_root
,
Histogram_type
type_arg
,
const
uchar
*
ptr
,
uint
size
)
override
{
return
false
;}
void
serialize
(
Field
*
to_field
)
override
{}
uint
get_size
()
override
{
return
(
uint
)
size
;}
// returns number of buckets in the histogram
uint
get_width
()
override
{
return
size
;
};
Histogram_type
get_type
()
override
{
return
JSON
;
}
void
set_size
(
ulonglong
sz
)
override
{
size
=
(
uint8
)
sz
;
}
void
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size
)
override
;
bool
is_available
()
override
{
return
get_size
()
>
0
&&
get_values
();
}
bool
is_usable
(
THD
*
thd
)
override
{
return
thd
->
variables
.
optimizer_use_condition_selectivity
>
3
&&
is_available
();
}
void
set_values
(
uchar
*
vals
)
override
{
values
=
(
uchar
*
)
vals
;
}
uchar
*
get_values
()
override
{
return
(
uchar
*
)
values
;
}
double
range_selectivity
(
double
min_pos
,
double
max_pos
)
override
{
return
0.1
;}
double
point_selectivity
(
double
pos
,
double
avg_selection
)
override
{
return
0.5
;}
};
class
Columns_statistics
;
class
Index_statistics
;
...
...
@@ -411,7 +469,7 @@ class Column_statistics
public:
Histogram_type
histogram_type_on_disk
;
Histogram
*
histogram_
;
Histogram
_base
*
histogram_
;
uint32
no_values_provided_bitmap
()
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment