Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
05877df4
Commit
05877df4
authored
Oct 22, 2021
by
Sergei Petrunia
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MDEV-26849: JSON Histograms: point selectivity estimates are off
.. for non-existent values. Handle this special case.
parent
f3f78bed
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
142 additions
and
10 deletions
+142
-10
mysql-test/main/statistics.result
mysql-test/main/statistics.result
+1
-0
mysql-test/main/statistics.test
mysql-test/main/statistics.test
+1
-1
mysql-test/main/statistics_json.result
mysql-test/main/statistics_json.result
+102
-0
mysql-test/main/statistics_json.test
mysql-test/main/statistics_json.test
+22
-0
sql/opt_histogram_json.cc
sql/opt_histogram_json.cc
+6
-4
sql/opt_histogram_json.h
sql/opt_histogram_json.h
+2
-1
sql/sql_statistics.cc
sql/sql_statistics.cc
+4
-2
sql/sql_statistics.h
sql/sql_statistics.h
+4
-2
No files found.
mysql-test/main/statistics.result
View file @
05877df4
...
...
@@ -1885,6 +1885,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 0.15705,0.15711,0.21463,0.15705,0.15711,0.15
explain select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
drop table t0;
drop table t1;
set analyze_sample_percentage=@save_analyze_sample_percentage;
set histogram_size=@save_histogram_size;
...
...
mysql-test/main/statistics.test
View file @
05877df4
...
...
@@ -1019,7 +1019,7 @@ select table_name, column_name, min_value, max_value, nulls_ratio, avg_length, a
from
mysql
.
column_stats
;
explain
select
*
from
t1
;
drop
table
t0
;
drop
table
t1
;
set
analyze_sample_percentage
=@
save_analyze_sample_percentage
;
set
histogram_size
=@
save_histogram_size
;
...
...
mysql-test/main/statistics_json.result
View file @
05877df4
...
...
@@ -4148,6 +4148,7 @@ t1 id 1 17384 0.0000 4.0000 14.0000 {
explain select * from t1;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 229376
drop table t0;
drop table t1;
set analyze_sample_percentage=@save_analyze_sample_percentage;
set histogram_size=@save_histogram_size;
...
...
@@ -7530,3 +7531,104 @@ select c from t1 where c > '1';
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 16 16.00 75.00 75.00 Using where
drop table t1;
#
# MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
#
#
create table t0(a int);
insert into t0 (a) values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
create table t1(a int);
insert into t1 select 100*A.a from t0 A, t0 B, t0 C;
select a, count(*) from t1 group by a order by a;
a count(*)
0 100
100 100
200 100
300 100
400 100
500 100
600 100
700 100
800 100
900 100
set histogram_type=json_hb, histogram_size=default;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
select * from mysql.column_stats where table_name='t1';
db_name table_name column_name min_value max_value nulls_ratio avg_length avg_frequency hist_size hist_type histogram
test t1 a 0 900 0.0000 4.0000 100.0000 10 JSON_HB {
"histogram_hb_v2": [
{
"start": "0",
"size": 0.1,
"ndv": 1
},
{
"start": "100",
"size": 0.1,
"ndv": 1
},
{
"start": "200",
"size": 0.1,
"ndv": 1
},
{
"start": "300",
"size": 0.1,
"ndv": 1
},
{
"start": "400",
"size": 0.1,
"ndv": 1
},
{
"start": "500",
"size": 0.1,
"ndv": 1
},
{
"start": "600",
"size": 0.1,
"ndv": 1
},
{
"start": "700",
"size": 0.1,
"ndv": 1
},
{
"start": "800",
"size": 0.1,
"ndv": 1
},
{
"start": "900",
"end": "900",
"size": 0.1,
"ndv": 1
}
]
}
analyze select * from t1 where a=0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
analyze select * from t1 where a=50;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
analyze select * from t1 where a=70;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
analyze select * from t1 where a=100;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
analyze select * from t1 where a=150;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 0.10 0.00 Using where
analyze select * from t1 where a=200;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 1000 1000.00 10.00 10.00 Using where
drop table t0,t1;
mysql-test/main/statistics_json.test
View file @
05877df4
...
...
@@ -295,3 +295,25 @@ select c from t1 where c > '1';
drop
table
t1
;
--
echo
#
--
echo
# MDEV-26849: JSON Histograms: point selectivity estimates are off for non-existent values
--
echo
#
--
echo
#
create
table
t0
(
a
int
);
insert
into
t0
(
a
)
values
(
0
),(
1
),(
2
),(
3
),(
4
),(
5
),(
6
),(
7
),(
8
),(
9
);
create
table
t1
(
a
int
);
insert
into
t1
select
100
*
A
.
a
from
t0
A
,
t0
B
,
t0
C
;
select
a
,
count
(
*
)
from
t1
group
by
a
order
by
a
;
set
histogram_type
=
json_hb
,
histogram_size
=
default
;
analyze
table
t1
persistent
for
all
;
select
*
from
mysql
.
column_stats
where
table_name
=
't1'
;
analyze
select
*
from
t1
where
a
=
0
;
analyze
select
*
from
t1
where
a
=
50
;
analyze
select
*
from
t1
where
a
=
70
;
analyze
select
*
from
t1
where
a
=
100
;
analyze
select
*
from
t1
where
a
=
150
;
analyze
select
*
from
t1
where
a
=
200
;
drop
table
t0
,
t1
;
sql/opt_histogram_json.cc
View file @
05877df4
...
...
@@ -616,7 +616,7 @@ double position_in_interval(Field *field, const uchar *key, uint key_len,
double
Histogram_json_hb
::
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_sel
)
double
avg_sel
,
double
total_rows
)
{
const
uchar
*
key
=
endpoint
->
key
;
if
(
field
->
real_maybe_null
())
...
...
@@ -631,9 +631,11 @@ double Histogram_json_hb::point_selectivity(Field *field, key_range *endpoint,
if
(
buckets
[
idx
].
ndv
==
1
&&
!
equal
)
{
// The bucket has a single value and it doesn't match! Use the global
// average.
sel
=
avg_sel
;
/*
The bucket has a single value and it doesn't match! Return a very
small value.
*/
sel
=
1.0
/
total_rows
;
}
else
{
...
...
sql/opt_histogram_json.h
View file @
05877df4
...
...
@@ -109,7 +109,8 @@ class Histogram_json_hb : public Histogram_base
ulonglong
size
)
override
;
double
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_selection
)
override
;
double
avg_selection
,
double
total_rows
)
override
;
double
range_selectivity
(
Field
*
field
,
key_range
*
min_endp
,
key_range
*
max_endp
)
override
;
...
...
sql/sql_statistics.cc
View file @
05877df4
...
...
@@ -3882,7 +3882,8 @@ double get_column_range_cardinality(Field *field,
{
res
=
col_non_nulls
*
hist
->
point_selectivity
(
field
,
min_endp
,
avg_frequency
/
col_non_nulls
);
avg_frequency
/
col_non_nulls
,
tab_records
);
}
}
else
if
(
avg_frequency
==
0.0
)
...
...
@@ -3973,7 +3974,8 @@ double get_column_range_cardinality(Field *field,
*/
double
Histogram_binary
::
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_sel
)
double
avg_sel
,
double
total_records
)
{
double
sel
;
Column_statistics
*
col_stats
=
field
->
read_stats
;
...
...
sql/sql_statistics.h
View file @
05877df4
...
...
@@ -186,7 +186,8 @@ class Histogram_base
virtual
double
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_selection
)
=
0
;
double
avg_selectivity
,
double
total_rows
)
=
0
;
virtual
double
range_selectivity
(
Field
*
field
,
key_range
*
min_endp
,
key_range
*
max_endp
)
=
0
;
...
...
@@ -355,7 +356,8 @@ class Histogram_binary : public Histogram_base
Estimate selectivity of "col=const" using a histogram
*/
double
point_selectivity
(
Field
*
field
,
key_range
*
endpoint
,
double
avg_sel
)
override
;
double
avg_sel
,
double
total_rows
)
override
;
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment