Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
1d981685
Commit
1d981685
authored
Sep 04, 2021
by
Sergei Petrunia
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Move JSON histograms code into its own files
parent
4ab2b78b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
559 additions
and
490 deletions
+559
-490
sql/CMakeLists.txt
sql/CMakeLists.txt
+1
-0
sql/opt_histogram_json.cc
sql/opt_histogram_json.cc
+391
-0
sql/opt_histogram_json.h
sql/opt_histogram_json.h
+95
-0
sql/sql_statistics.cc
sql/sql_statistics.cc
+3
-432
sql/sql_statistics.h
sql/sql_statistics.h
+69
-58
No files found.
sql/CMakeLists.txt
View file @
1d981685
...
...
@@ -151,6 +151,7 @@ SET (SQL_SOURCE
sql_analyze_stmt.cc
sql_join_cache.cc
create_options.cc multi_range_read.cc
opt_histogram_json.cc
opt_index_cond_pushdown.cc opt_subselect.cc
opt_table_elimination.cc sql_expression_cache.cc
gcalc_slicescan.cc gcalc_tools.cc
...
...
sql/opt_histogram_json.cc
0 → 100644
View file @
1d981685
/*
Copyright (c) 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
#include "mariadb.h"
#include "sql_base.h"
#include "my_json_writer.h"
#include "sql_statistics.h"
#include "opt_histogram_json.h"
class
Histogram_json_builder
:
public
Histogram_builder
{
Histogram_json_hb
*
histogram
;
uint
hist_width
;
/* the number of points in the histogram */
double
bucket_capacity
;
/* number of rows in a bucket of the histogram */
uint
curr_bucket
;
/* number of the current bucket to be built */
std
::
vector
<
std
::
string
>
bucket_bounds
;
bool
first_value
=
true
;
public:
Histogram_json_builder
(
Histogram_json_hb
*
hist
,
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
:
Histogram_builder
(
col
,
col_len
,
rows
),
histogram
(
hist
)
{
bucket_capacity
=
(
double
)
records
/
histogram
->
get_width
();
hist_width
=
histogram
->
get_width
();
curr_bucket
=
0
;
}
~
Histogram_json_builder
()
override
=
default
;
/*
@brief
Add data to the histogram. This call adds elem_cnt rows, each
of which has value of *elem.
@detail
Subsequent next() calls will add values that are greater than *elem.
*/
int
next
(
void
*
elem
,
element_count
elem_cnt
)
override
{
counters
.
next
(
elem
,
elem_cnt
);
ulonglong
count
=
counters
.
get_count
();
if
(
curr_bucket
==
hist_width
)
return
0
;
if
(
first_value
)
{
first_value
=
false
;
column
->
store_field_value
((
uchar
*
)
elem
,
col_length
);
StringBuffer
<
MAX_FIELD_WIDTH
>
val
;
column
->
val_str
(
&
val
);
bucket_bounds
.
push_back
(
std
::
string
(
val
.
ptr
(),
val
.
length
()));
}
if
(
count
>
bucket_capacity
*
(
curr_bucket
+
1
))
{
column
->
store_field_value
((
uchar
*
)
elem
,
col_length
);
StringBuffer
<
MAX_FIELD_WIDTH
>
val
;
column
->
val_str
(
&
val
);
bucket_bounds
.
emplace_back
(
val
.
ptr
(),
val
.
length
());
curr_bucket
++
;
while
(
curr_bucket
!=
hist_width
&&
count
>
bucket_capacity
*
(
curr_bucket
+
1
))
{
bucket_bounds
.
push_back
(
std
::
string
(
val
.
ptr
(),
val
.
length
()));
curr_bucket
++
;
}
}
if
(
records
==
count
&&
bucket_bounds
.
size
()
==
hist_width
)
{
column
->
store_field_value
((
uchar
*
)
elem
,
col_length
);
StringBuffer
<
MAX_FIELD_WIDTH
>
val
;
column
->
val_str
(
&
val
);
bucket_bounds
.
push_back
(
std
::
string
(
val
.
ptr
(),
val
.
length
()));
}
return
0
;
}
/*
@brief
Finalize the creation of histogram
*/
void
finalize
()
override
{
Json_writer
writer
;
writer
.
start_object
();
writer
.
add_member
(
Histogram_json_hb
::
JSON_NAME
).
start_array
();
for
(
auto
&
value
:
bucket_bounds
)
{
writer
.
add_str
(
value
.
c_str
());
}
writer
.
end_array
();
writer
.
end_object
();
Binary_string
*
json_string
=
(
Binary_string
*
)
writer
.
output
.
get_string
();
histogram
->
set_json_text
(
bucket_bounds
.
size
()
-
1
,
(
uchar
*
)
json_string
->
c_ptr
());
}
};
Histogram_builder
*
Histogram_json_hb
::
create_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
{
return
new
Histogram_json_builder
(
this
,
col
,
col_len
,
rows
);
}
void
Histogram_json_hb
::
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size_arg
)
{
DBUG_ASSERT
(
htype_arg
==
JSON_HB
);
size
=
(
uint8
)
size_arg
;
}
/*
@brief
Parse the histogram from its on-disk representation
@return
false OK
True Error
*/
bool
Histogram_json_hb
::
parse
(
MEM_ROOT
*
mem_root
,
Field
*
field
,
Histogram_type
type_arg
,
const
char
*
hist_data
,
size_t
hist_data_len
)
{
DBUG_ENTER
(
"Histogram_json_hb::parse"
);
DBUG_ASSERT
(
type_arg
==
JSON_HB
);
const
char
*
err
;
json_engine_t
je
;
json_string_t
key_name
;
json_scan_start
(
&
je
,
&
my_charset_utf8mb4_bin
,
(
const
uchar
*
)
hist_data
,
(
const
uchar
*
)
hist_data
+
hist_data_len
);
if
(
json_read_value
(
&
je
)
||
je
.
value_type
!=
JSON_VALUE_OBJECT
)
{
err
=
"Root JSON element must be a JSON object"
;
goto
error
;
}
json_string_set_str
(
&
key_name
,
(
const
uchar
*
)
JSON_NAME
,
(
const
uchar
*
)
JSON_NAME
+
strlen
(
JSON_NAME
));
json_string_set_cs
(
&
key_name
,
system_charset_info
);
if
(
json_scan_next
(
&
je
)
||
je
.
state
!=
JST_KEY
||
!
json_key_matches
(
&
je
,
&
key_name
))
{
err
=
"The first key in the object must be histogram_hb_v1"
;
goto
error
;
}
// The value must be a JSON array
if
(
json_read_value
(
&
je
)
||
(
je
.
value_type
!=
JSON_VALUE_ARRAY
))
{
err
=
"A JSON array expected"
;
goto
error
;
}
// Read the array
while
(
!
json_scan_next
(
&
je
))
{
switch
(
je
.
state
)
{
case
JST_VALUE
:
{
const
char
*
val
;
int
val_len
;
json_smart_read_value
(
&
je
,
&
val
,
&
val_len
);
if
(
je
.
value_type
!=
JSON_VALUE_STRING
&&
je
.
value_type
!=
JSON_VALUE_NUMBER
&&
je
.
value_type
!=
JSON_VALUE_TRUE
&&
je
.
value_type
!=
JSON_VALUE_FALSE
)
{
err
=
"Scalar value expected"
;
goto
error
;
}
uchar
buf
[
MAX_KEY_LENGTH
];
uint
len_to_copy
=
field
->
key_length
();
field
->
store_text
(
val
,
val_len
,
&
my_charset_bin
);
uint
bytes
=
field
->
get_key_image
(
buf
,
len_to_copy
,
Field
::
itRAW
);
histogram_bounds
.
push_back
(
std
::
string
((
char
*
)
buf
,
bytes
));
// TODO: Should we also compare this endpoint with the previous
// to verify that the ordering is right?
break
;
}
case
JST_ARRAY_END
:
break
;
}
}
// n_buckets = n_bounds - 1 :
size
=
histogram_bounds
.
size
()
-
1
;
DBUG_RETURN
(
false
);
error:
my_error
(
ER_JSON_HISTOGRAM_PARSE_FAILED
,
MYF
(
0
),
err
,
je
.
s
.
c_str
-
(
const
uchar
*
)
hist_data
);
DBUG_RETURN
(
true
);
}
static
void
store_key_image_to_rec_no_null
(
Field
*
field
,
const
uchar
*
ptr
)
{
MY_BITMAP
*
old_map
=
dbug_tmp_use_all_columns
(
field
->
table
,
&
field
->
table
->
write_set
);
field
->
set_key_image
(
ptr
,
field
->
key_length
());
dbug_tmp_restore_column_map
(
&
field
->
table
->
write_set
,
old_map
);
}
static
double
position_in_interval
(
Field
*
field
,
const
uchar
*
key
,
const
std
::
string
&
left
,
const
std
::
string
&
right
)
{
double
res
;
if
(
field
->
pos_through_val_str
())
{
uint32
min_len
=
uint2korr
(
left
.
data
());
uint32
max_len
=
uint2korr
(
right
.
data
());
uint32
midp_len
=
uint2korr
(
key
);
res
=
pos_in_interval_for_string
(
field
->
charset
(),
key
+
HA_KEY_BLOB_LENGTH
,
midp_len
,
(
const
uchar
*
)
left
.
data
()
+
HA_KEY_BLOB_LENGTH
,
min_len
,
(
const
uchar
*
)
right
.
data
()
+
HA_KEY_BLOB_LENGTH
,
max_len
);
}
else
{
store_key_image_to_rec_no_null
(
field
,
(
const
uchar
*
)
left
.
data
());
double
min_val_real
=
field
->
val_real
();
store_key_image_to_rec_no_null
(
field
,
(
const
uchar
*
)
right
.
data
());
double
max_val_real
=
field
->
val_real
();
store_key_image_to_rec_no_null
(
field
,
key
);
double
midp_val_real
=
field
->
val_real
();
res
=
pos_in_interval_for_double
(
midp_val_real
,
min_val_real
,
max_val_real
);
}
return
res
;
}
double
Histogram_json_hb
::
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_sel
)
{
double
sel
;
store_key_image_to_rec
(
field
,
(
uchar
*
)
endpoint
->
key
,
field
->
key_length
());
const
uchar
*
min_key
=
endpoint
->
key
;
if
(
field
->
real_maybe_null
())
min_key
++
;
uint
min_idx
=
find_bucket
(
field
,
min_key
,
false
);
uint
max_idx
=
find_bucket
(
field
,
min_key
,
true
);
#if 0
// find how many buckets this value occupies
while ((max_idx + 1 < get_width() ) &&
(field->key_cmp((uchar *)histogram_bounds[max_idx + 1].data(), min_key) == 0)) {
max_idx++;
}
#endif
if
(
max_idx
>
min_idx
)
{
// value spans multiple buckets
double
bucket_sel
=
1.0
/
(
get_width
()
+
1
);
sel
=
bucket_sel
*
(
max_idx
-
min_idx
+
1
);
}
else
{
// the value fits within a single bucket
sel
=
MY_MIN
(
avg_sel
,
1.0
/
get_width
());
}
return
sel
;
}
/*
@param field The table field histogram is for. We don't care about the
field's current value, we only need its virtual functions to
perform various operations
@param min_endp Left endpoint, or NULL if there is none
@param max_endp Right endpoint, or NULL if there is none
*/
double
Histogram_json_hb
::
range_selectivity
(
Field
*
field
,
key_range
*
min_endp
,
key_range
*
max_endp
)
{
double
min
,
max
;
double
width
=
1.0
/
histogram_bounds
.
size
();
if
(
min_endp
&&
!
(
field
->
null_ptr
&&
min_endp
->
key
[
0
]))
{
bool
exclusive_endp
=
(
min_endp
->
flag
==
HA_READ_AFTER_KEY
)
?
true
:
false
;
const
uchar
*
min_key
=
min_endp
->
key
;
if
(
field
->
real_maybe_null
())
min_key
++
;
// Find the leftmost bucket that contains the lookup value.
// (If the lookup value is to the left of all buckets, find bucket #0)
int
idx
=
find_bucket
(
field
,
min_key
,
exclusive_endp
);
double
min_sel
=
position_in_interval
(
field
,
(
const
uchar
*
)
min_key
,
histogram_bounds
[
idx
],
histogram_bounds
[
idx
+
1
]);
min
=
idx
*
width
+
min_sel
*
width
;
}
else
min
=
0.0
;
if
(
max_endp
)
{
// The right endpoint cannot be NULL
DBUG_ASSERT
(
!
(
field
->
null_ptr
&&
max_endp
->
key
[
0
]));
bool
inclusive_endp
=
(
max_endp
->
flag
==
HA_READ_AFTER_KEY
)
?
true
:
false
;
const
uchar
*
max_key
=
max_endp
->
key
;
if
(
field
->
real_maybe_null
())
max_key
++
;
int
idx
=
find_bucket
(
field
,
max_key
,
inclusive_endp
);
double
max_sel
=
position_in_interval
(
field
,
(
const
uchar
*
)
max_key
,
histogram_bounds
[
idx
],
histogram_bounds
[
idx
+
1
]);
max
=
idx
*
width
+
max_sel
*
width
;
}
else
max
=
1.0
;
double
sel
=
max
-
min
;
return
sel
;
}
void
Histogram_json_hb
::
serialize
(
Field
*
field
)
{
field
->
store
(
json_text
.
data
(),
json_text
.
size
(),
&
my_charset_bin
);
}
/*
Find the histogram bucket that contains the value.
@param equal_is_less Controls what to do if a histogram bound is equal to the
lookup_val.
*/
int
Histogram_json_hb
::
find_bucket
(
Field
*
field
,
const
uchar
*
lookup_val
,
bool
equal_is_less
)
{
int
low
=
0
;
int
high
=
histogram_bounds
.
size
()
-
1
;
int
middle
;
while
(
low
+
1
<
high
)
{
middle
=
(
low
+
high
)
/
2
;
int
res
=
field
->
key_cmp
((
uchar
*
)
histogram_bounds
[
middle
].
data
(),
lookup_val
);
if
(
!
res
)
res
=
equal_is_less
?
-
1
:
1
;
if
(
res
<
0
)
low
=
middle
;
else
//res > 0
high
=
middle
;
}
return
low
;
}
sql/opt_histogram_json.h
0 → 100644
View file @
1d981685
/*
Copyright (c) 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA */
#include "sql_statistics.h"
/*
An equi-height histogram which stores real values for bucket bounds.
Handles @@histogram_type=JSON_HB
*/
class
Histogram_json_hb
:
public
Histogram_base
{
size_t
size
;
/* Number of elements in the histogram */
/* Collection-time only: collected histogram in the JSON form. */
std
::
string
json_text
;
// Array of histogram bucket endpoints in KeyTupleFormat.
std
::
vector
<
std
::
string
>
histogram_bounds
;
public:
static
constexpr
const
char
*
JSON_NAME
=
"histogram_hb_v1"
;
bool
parse
(
MEM_ROOT
*
mem_root
,
Field
*
field
,
Histogram_type
type_arg
,
const
char
*
hist_data
,
size_t
hist_data_len
)
override
;
void
serialize
(
Field
*
field
)
override
;
Histogram_builder
*
create_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
override
;
// returns number of buckets in the histogram
uint
get_width
()
override
{
return
(
uint
)
size
;
}
Histogram_type
get_type
()
override
{
return
JSON_HB
;
}
/*
@brief
Legacy: this returns the size of the histogram on disk.
@detail
This is only called at collection time when json_text is non-empty.
*/
uint
get_size
()
override
{
return
json_text
.
size
();
}
void
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size
)
override
;
bool
is_available
()
override
{
return
true
;
}
bool
is_usable
(
THD
*
thd
)
override
{
return
thd
->
variables
.
optimizer_use_condition_selectivity
>
3
&&
is_available
();
}
double
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_selection
)
override
;
double
range_selectivity
(
Field
*
field
,
key_range
*
min_endp
,
key_range
*
max_endp
)
override
;
void
set_json_text
(
ulonglong
sz
,
uchar
*
json_text_arg
)
{
size
=
(
uint8
)
sz
;
json_text
.
assign
((
const
char
*
)
json_text_arg
,
strlen
((
const
char
*
)
json_text_arg
));
}
private:
int
find_bucket
(
Field
*
field
,
const
uchar
*
lookup_val
,
bool
equal_is_less
);
};
sql/sql_statistics.cc
View file @
1d981685
...
...
@@ -28,11 +28,11 @@
#include "sql_base.h"
#include "key.h"
#include "sql_statistics.h"
#include "opt_histogram_json.h"
#include "opt_range.h"
#include "uniques.h"
#include "sql_show.h"
#include "sql_partition.h"
#include "my_json_writer.h"
#include <vector>
#include <string>
...
...
@@ -1267,8 +1267,8 @@ void Histogram_binary::serialize(Field *field)
}
void
Histogram_binary
::
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size_arg
)
Histogram_type
htype_arg
,
ulonglong
size_arg
)
{
type
=
htype_arg
;
values
=
(
uchar
*
)
alloc_root
(
mem_root
,
size_arg
);
...
...
@@ -1276,273 +1276,6 @@ void Histogram_binary::init_for_collection(MEM_ROOT *mem_root,
}
void
Histogram_json_hb
::
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size_arg
)
{
DBUG_ASSERT
(
htype_arg
==
JSON_HB
);
size
=
(
uint8
)
size_arg
;
}
/*
@brief
Parse the histogram from its on-disk representation
@return
false OK
True Error
*/
bool
Histogram_json_hb
::
parse
(
MEM_ROOT
*
mem_root
,
Field
*
field
,
Histogram_type
type_arg
,
const
char
*
hist_data
,
size_t
hist_data_len
)
{
DBUG_ENTER
(
"Histogram_json_hb::parse"
);
DBUG_ASSERT
(
type_arg
==
JSON_HB
);
const
char
*
err
;
json_engine_t
je
;
json_string_t
key_name
;
json_scan_start
(
&
je
,
&
my_charset_utf8mb4_bin
,
(
const
uchar
*
)
hist_data
,
(
const
uchar
*
)
hist_data
+
hist_data_len
);
if
(
json_read_value
(
&
je
)
||
je
.
value_type
!=
JSON_VALUE_OBJECT
)
{
err
=
"Root JSON element must be a JSON object"
;
goto
error
;
}
json_string_set_str
(
&
key_name
,
(
const
uchar
*
)
JSON_NAME
,
(
const
uchar
*
)
JSON_NAME
+
strlen
(
JSON_NAME
));
json_string_set_cs
(
&
key_name
,
system_charset_info
);
if
(
json_scan_next
(
&
je
)
||
je
.
state
!=
JST_KEY
||
!
json_key_matches
(
&
je
,
&
key_name
))
{
err
=
"The first key in the object must be histogram_hb_v1"
;
goto
error
;
}
// The value must be a JSON array
if
(
json_read_value
(
&
je
)
||
(
je
.
value_type
!=
JSON_VALUE_ARRAY
))
{
err
=
"A JSON array expected"
;
goto
error
;
}
// Read the array
while
(
!
json_scan_next
(
&
je
))
{
switch
(
je
.
state
)
{
case
JST_VALUE
:
{
const
char
*
val
;
int
val_len
;
json_smart_read_value
(
&
je
,
&
val
,
&
val_len
);
if
(
je
.
value_type
!=
JSON_VALUE_STRING
&&
je
.
value_type
!=
JSON_VALUE_NUMBER
&&
je
.
value_type
!=
JSON_VALUE_TRUE
&&
je
.
value_type
!=
JSON_VALUE_FALSE
)
{
err
=
"Scalar value expected"
;
goto
error
;
}
uchar
buf
[
MAX_KEY_LENGTH
];
uint
len_to_copy
=
field
->
key_length
();
field
->
store_text
(
val
,
val_len
,
&
my_charset_bin
);
uint
bytes
=
field
->
get_key_image
(
buf
,
len_to_copy
,
Field
::
itRAW
);
histogram_bounds
.
push_back
(
std
::
string
((
char
*
)
buf
,
bytes
));
// TODO: Should we also compare this endpoint with the previous
// to verify that the ordering is right?
break
;
}
case
JST_ARRAY_END
:
break
;
}
}
// n_buckets = n_bounds - 1 :
size
=
histogram_bounds
.
size
()
-
1
;
DBUG_RETURN
(
false
);
error:
my_error
(
ER_JSON_HISTOGRAM_PARSE_FAILED
,
MYF
(
0
),
err
,
je
.
s
.
c_str
-
(
const
uchar
*
)
hist_data
);
DBUG_RETURN
(
true
);
}
double
Histogram_json_hb
::
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_sel
)
{
double
sel
;
store_key_image_to_rec
(
field
,
(
uchar
*
)
endpoint
->
key
,
field
->
key_length
());
const
uchar
*
min_key
=
endpoint
->
key
;
if
(
field
->
real_maybe_null
())
min_key
++
;
uint
min_idx
=
find_bucket
(
field
,
min_key
,
false
);
uint
max_idx
=
find_bucket
(
field
,
min_key
,
true
);
#if 0
// find how many buckets this value occupies
while ((max_idx + 1 < get_width() ) &&
(field->key_cmp((uchar *)histogram_bounds[max_idx + 1].data(), min_key) == 0)) {
max_idx++;
}
#endif
if
(
max_idx
>
min_idx
)
{
// value spans multiple buckets
double
bucket_sel
=
1.0
/
(
get_width
()
+
1
);
sel
=
bucket_sel
*
(
max_idx
-
min_idx
+
1
);
}
else
{
// the value fits within a single bucket
sel
=
MY_MIN
(
avg_sel
,
1.0
/
get_width
());
}
return
sel
;
}
static
void
store_key_image_to_rec_no_null
(
Field
*
field
,
const
uchar
*
ptr
)
{
MY_BITMAP
*
old_map
=
dbug_tmp_use_all_columns
(
field
->
table
,
&
field
->
table
->
write_set
);
field
->
set_key_image
(
ptr
,
field
->
key_length
());
dbug_tmp_restore_column_map
(
&
field
->
table
->
write_set
,
old_map
);
}
static
double
position_in_interval
(
Field
*
field
,
const
uchar
*
key
,
const
std
::
string
&
left
,
const
std
::
string
&
right
)
{
double
res
;
if
(
field
->
pos_through_val_str
())
{
uint32
min_len
=
uint2korr
(
left
.
data
());
uint32
max_len
=
uint2korr
(
right
.
data
());
uint32
midp_len
=
uint2korr
(
key
);
res
=
pos_in_interval_for_string
(
field
->
charset
(),
key
+
HA_KEY_BLOB_LENGTH
,
midp_len
,
(
const
uchar
*
)
left
.
data
()
+
HA_KEY_BLOB_LENGTH
,
min_len
,
(
const
uchar
*
)
right
.
data
()
+
HA_KEY_BLOB_LENGTH
,
max_len
);
}
else
{
store_key_image_to_rec_no_null
(
field
,
(
const
uchar
*
)
left
.
data
());
double
min_val_real
=
field
->
val_real
();
store_key_image_to_rec_no_null
(
field
,
(
const
uchar
*
)
right
.
data
());
double
max_val_real
=
field
->
val_real
();
store_key_image_to_rec_no_null
(
field
,
key
);
double
midp_val_real
=
field
->
val_real
();
res
=
pos_in_interval_for_double
(
midp_val_real
,
min_val_real
,
max_val_real
);
}
return
res
;
}
/*
@param field The table field histogram is for. We don't care about the
field's current value, we only need its virtual functions to
perform various operations
@param min_endp Left endpoint, or NULL if there is none
@param max_endp Right endpoint, or NULL if there is none
*/
double
Histogram_json_hb
::
range_selectivity
(
Field
*
field
,
key_range
*
min_endp
,
key_range
*
max_endp
)
{
double
min
,
max
;
double
width
=
1.0
/
histogram_bounds
.
size
();
if
(
min_endp
&&
!
(
field
->
null_ptr
&&
min_endp
->
key
[
0
]))
{
bool
exclusive_endp
=
(
min_endp
->
flag
==
HA_READ_AFTER_KEY
)
?
true
:
false
;
const
uchar
*
min_key
=
min_endp
->
key
;
if
(
field
->
real_maybe_null
())
min_key
++
;
// Find the leftmost bucket that contains the lookup value.
// (If the lookup value is to the left of all buckets, find bucket #0)
int
idx
=
find_bucket
(
field
,
min_key
,
exclusive_endp
);
double
min_sel
=
position_in_interval
(
field
,
(
const
uchar
*
)
min_key
,
histogram_bounds
[
idx
],
histogram_bounds
[
idx
+
1
]);
min
=
idx
*
width
+
min_sel
*
width
;
}
else
min
=
0.0
;
if
(
max_endp
)
{
// The right endpoint cannot be NULL
DBUG_ASSERT
(
!
(
field
->
null_ptr
&&
max_endp
->
key
[
0
]));
bool
inclusive_endp
=
(
max_endp
->
flag
==
HA_READ_AFTER_KEY
)
?
true
:
false
;
const
uchar
*
max_key
=
max_endp
->
key
;
if
(
field
->
real_maybe_null
())
max_key
++
;
int
idx
=
find_bucket
(
field
,
max_key
,
inclusive_endp
);
double
max_sel
=
position_in_interval
(
field
,
(
const
uchar
*
)
max_key
,
histogram_bounds
[
idx
],
histogram_bounds
[
idx
+
1
]);
max
=
idx
*
width
+
max_sel
*
width
;
}
else
max
=
1.0
;
double
sel
=
max
-
min
;
return
sel
;
}
void
Histogram_json_hb
::
serialize
(
Field
*
field
)
{
field
->
store
(
json_text
.
data
(),
json_text
.
size
(),
&
my_charset_bin
);
}
/*
Find the histogram bucket that contains the value.
@param equal_is_less Controls what to do if a histogram bound is equal to the
lookup_val.
*/
int
Histogram_json_hb
::
find_bucket
(
Field
*
field
,
const
uchar
*
lookup_val
,
bool
equal_is_less
)
{
int
low
=
0
;
int
high
=
histogram_bounds
.
size
()
-
1
;
int
middle
;
while
(
low
+
1
<
high
)
{
middle
=
(
low
+
high
)
/
2
;
int
res
=
field
->
key_cmp
((
uchar
*
)
histogram_bounds
[
middle
].
data
(),
lookup_val
);
if
(
!
res
)
res
=
equal_is_less
?
-
1
:
1
;
if
(
res
<
0
)
low
=
middle
;
else
//res > 0
high
=
middle
;
}
return
low
;
}
/*
An object of the class Index_stat is created to read statistical
data on tables from the statistical table table_stat, to update
...
...
@@ -1853,73 +1586,6 @@ class Stat_table_write_iter
}
};
/*
This is used to collect the the basic statistics from a Unique object:
- count of values
- count of distinct values
- count of distinct values that have occurred only once
*/
class
Basic_stats_collector
{
ulonglong
count
;
/* number of values retrieved */
ulonglong
count_distinct
;
/* number of distinct values retrieved */
/* number of distinct values that occured only once */
ulonglong
count_distinct_single_occurence
;
public:
Basic_stats_collector
()
{
count
=
0
;
count_distinct
=
0
;
count_distinct_single_occurence
=
0
;
}
ulonglong
get_count_distinct
()
const
{
return
count_distinct
;
}
ulonglong
get_count_single_occurence
()
const
{
return
count_distinct_single_occurence
;
}
ulonglong
get_count
()
const
{
return
count
;
}
void
next
(
void
*
elem
,
element_count
elem_cnt
)
{
count_distinct
++
;
if
(
elem_cnt
==
1
)
count_distinct_single_occurence
++
;
count
+=
elem_cnt
;
}
};
/*
Histogram_builder is a helper class that is used to build histograms
for columns.
Do not create directly, call Histogram->get_builder(...);
*/
class
Histogram_builder
{
protected:
Field
*
column
;
/* table field for which the histogram is built */
uint
col_length
;
/* size of this field */
ha_rows
records
;
/* number of records the histogram is built for */
Histogram_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
:
column
(
col
),
col_length
(
col_len
),
records
(
rows
)
{}
public:
// A histogram builder will also collect the counters
Basic_stats_collector
counters
;
virtual
int
next
(
void
*
elem
,
element_count
elem_cnt
)
=
0
;
virtual
void
finalize
()
=
0
;
virtual
~
Histogram_builder
(){}
};
class
Histogram_binary_builder
:
public
Histogram_builder
{
Field
*
min_value
;
/* pointer to the minimal value for the field */
...
...
@@ -1974,101 +1640,6 @@ Histogram_builder *Histogram_binary::create_builder(Field *col, uint col_len,
}
class
Histogram_json_builder
:
public
Histogram_builder
{
Histogram_json_hb
*
histogram
;
uint
hist_width
;
/* the number of points in the histogram */
double
bucket_capacity
;
/* number of rows in a bucket of the histogram */
uint
curr_bucket
;
/* number of the current bucket to be built */
std
::
vector
<
std
::
string
>
bucket_bounds
;
bool
first_value
=
true
;
public:
Histogram_json_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
:
Histogram_builder
(
col
,
col_len
,
rows
)
{
histogram
=
(
Histogram_json_hb
*
)
col
->
collected_stats
->
histogram
;
bucket_capacity
=
(
double
)
records
/
histogram
->
get_width
();
hist_width
=
histogram
->
get_width
();
curr_bucket
=
0
;
}
~
Histogram_json_builder
()
override
=
default
;
/*
Add data to the histogram. Adding Element elem which encountered elem_cnt
times.
*/
int
next
(
void
*
elem
,
element_count
elem_cnt
)
override
{
counters
.
next
(
elem
,
elem_cnt
);
ulonglong
count
=
counters
.
get_count
();
if
(
curr_bucket
==
hist_width
)
return
0
;
if
(
first_value
)
{
first_value
=
false
;
column
->
store_field_value
((
uchar
*
)
elem
,
col_length
);
StringBuffer
<
MAX_FIELD_WIDTH
>
val
;
column
->
val_str
(
&
val
);
bucket_bounds
.
push_back
(
std
::
string
(
val
.
ptr
(),
val
.
length
()));
}
if
(
count
>
bucket_capacity
*
(
curr_bucket
+
1
))
{
column
->
store_field_value
((
uchar
*
)
elem
,
col_length
);
StringBuffer
<
MAX_FIELD_WIDTH
>
val
;
column
->
val_str
(
&
val
);
bucket_bounds
.
emplace_back
(
val
.
ptr
(),
val
.
length
());
curr_bucket
++
;
while
(
curr_bucket
!=
hist_width
&&
count
>
bucket_capacity
*
(
curr_bucket
+
1
))
{
bucket_bounds
.
push_back
(
std
::
string
(
val
.
ptr
(),
val
.
length
()));
curr_bucket
++
;
}
}
if
(
records
==
count
&&
bucket_bounds
.
size
()
==
hist_width
)
{
column
->
store_field_value
((
uchar
*
)
elem
,
col_length
);
StringBuffer
<
MAX_FIELD_WIDTH
>
val
;
column
->
val_str
(
&
val
);
bucket_bounds
.
push_back
(
std
::
string
(
val
.
ptr
(),
val
.
length
()));
}
return
0
;
}
/*
Finalize the creation of histogram
*/
void
finalize
()
override
{
Json_writer
writer
;
writer
.
start_object
();
writer
.
add_member
(
Histogram_json_hb
::
JSON_NAME
).
start_array
();
for
(
auto
&
value
:
bucket_bounds
)
{
writer
.
add_str
(
value
.
c_str
());
}
writer
.
end_array
();
writer
.
end_object
();
Binary_string
*
json_string
=
(
Binary_string
*
)
writer
.
output
.
get_string
();
histogram
->
set_json_text
(
bucket_bounds
.
size
()
-
1
,
(
uchar
*
)
json_string
->
c_ptr
());
}
};
Histogram_builder
*
Histogram_json_hb
::
create_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
{
return
new
Histogram_json_builder
(
col
,
col_len
,
rows
);
}
Histogram_base
*
create_histogram
(
MEM_ROOT
*
mem_root
,
Histogram_type
hist_type
,
THD
*
owner
)
{
...
...
sql/sql_statistics.h
View file @
1d981685
...
...
@@ -162,11 +162,18 @@ class Histogram_base : public Sql_alloc
virtual
uint
get_width
()
=
0
;
virtual
Histogram_builder
*
create_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
=
0
;
/*
The creation-time workflow is:
* create a histogram
* init_for_collection()
* create_builder()
* feed the data to the builder
* serialize();
*/
virtual
void
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size
)
=
0
;
virtual
Histogram_builder
*
create_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
=
0
;
virtual
bool
is_available
()
=
0
;
...
...
@@ -177,19 +184,26 @@ class Histogram_base : public Sql_alloc
virtual
double
range_selectivity
(
Field
*
field
,
key_range
*
min_endp
,
key_range
*
max_endp
)
=
0
;
// Legacy: return the size of the histogram on disk.
// This will be stored in mysql.column_stats.hist_size column.
// Newer, JSON-based histograms may return 0.
/*
Legacy: return the size of the histogram on disk.
This will be stored in mysql.column_stats.hist_size column.
The value is not really needed as one can look at
LENGTH(mysql.column_stats.histogram) directly.
*/
virtual
uint
get_size
()
=
0
;
virtual
~
Histogram_base
()
=
default
;
Histogram_base
()
:
owner
(
NULL
)
{}
/*
Memory management: a histogram may be (exclusively) "owned" by a particular
thread (done for histograms that are being collected). By default, a
histogram has owner==NULL and is not owned by any particular thread.
*/
THD
*
get_owner
()
{
return
owner
;
}
void
set_owner
(
THD
*
thd
)
{
owner
=
thd
;
}
private:
// Owner is a thread that *exclusively* owns this histogram (and so can
// delete it at any time)
THD
*
owner
;
};
...
...
@@ -353,75 +367,72 @@ class Histogram_binary : public Histogram_base
/*
An equi-height histogram which stores real values for bucket bounds.
Handles @@histogram_type=JSON_HB
This is used to collect the the basic statistics from a Unique object:
- count of values
- count of distinct values
- count of distinct values that have occurred only once
*/
class
Histogram_json_hb
:
public
Histogram_base
class
Basic_stats_collector
{
private:
size_t
size
;
/* Number of elements in the histogram */
/* Collection-time only: collected histogram in the JSON form. */
std
::
string
json_text
;
// Array of histogram bucket endpoints in KeyTupleFormat.
std
::
vector
<
std
::
string
>
histogram_bounds
;
ulonglong
count
;
/* number of values retrieved */
ulonglong
count_distinct
;
/* number of distinct values retrieved */
/* number of distinct values that occured only once */
ulonglong
count_distinct_single_occurence
;
public:
static
constexpr
const
char
*
JSON_NAME
=
"histogram_hb_v1"
;
bool
parse
(
MEM_ROOT
*
mem_root
,
Field
*
field
,
Histogram_type
type_arg
,
const
char
*
hist_data
,
size_t
hist_data_len
)
override
;
void
serialize
(
Field
*
field
)
override
;
Histogram_builder
*
create_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
override
;
// returns number of buckets in the histogram
uint
get_width
()
override
Basic_stats_collector
()
{
return
(
uint
)
size
;
count
=
0
;
count_distinct
=
0
;
count_distinct_single_occurence
=
0
;
}
Histogram_type
get_type
()
override
ulonglong
get_count_distinct
()
const
{
return
count_distinct
;
}
ulonglong
get_count_single_occurence
()
const
{
return
JSON_HB
;
return
count_distinct_single_occurence
;
}
ulonglong
get_count
()
const
{
return
count
;
}
void
set_json_text
(
ulonglong
sz
,
uchar
*
json_text_arg
)
void
next
(
void
*
elem
,
element_count
elem_cnt
)
{
size
=
(
uint8
)
sz
;
json_text
.
assign
((
const
char
*
)
json_text_arg
,
strlen
((
const
char
*
)
json_text_arg
));
count_distinct
++
;
if
(
elem_cnt
==
1
)
count_distinct_single_occurence
++
;
count
+=
elem_cnt
;
}
};
uint
get_size
()
override
{
return
size
;
}
void
init_for_collection
(
MEM_ROOT
*
mem_root
,
Histogram_type
htype_arg
,
ulonglong
size
)
override
;
/*
Histogram_builder is a helper class that is used to build histograms
for columns.
bool
is_available
()
override
{
return
true
;
}
Do not create directly, call Histogram->get_builder(...);
*/
bool
is_usable
(
THD
*
thd
)
override
{
return
thd
->
variables
.
optimizer_use_condition_selectivity
>
3
&&
is_available
();
}
class
Histogram_builder
{
protected:
Field
*
column
;
/* table field for which the histogram is built */
uint
col_length
;
/* size of this field */
ha_rows
records
;
/* number of records the histogram is built for */
double
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_selection
)
override
;
double
range_selectivity
(
Field
*
field
,
key_range
*
min_endp
,
key_range
*
max_endp
)
override
;
private:
int
find_bucket
(
Field
*
field
,
const
uchar
*
lookup_val
,
bool
equal_is_less
);
Histogram_builder
(
Field
*
col
,
uint
col_len
,
ha_rows
rows
)
:
column
(
col
),
col_length
(
col_len
),
records
(
rows
)
{}
public:
// A histogram builder will also collect the counters
Basic_stats_collector
counters
;
virtual
int
next
(
void
*
elem
,
element_count
elem_cnt
)
=
0
;
virtual
void
finalize
()
=
0
;
virtual
~
Histogram_builder
(){}
};
class
Columns_statistics
;
class
Index_statistics
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment