Commit 669ce694 authored by Jimmy Yang's avatar Jimmy Yang

Fix Bug#30423 "InnoDBs treatment of NULL in index stats causes bad

"rows examined" estimates". This change implements "innodb_stats_method"
with options of "nulls_equal", "nulls_unequal" and "null_ignored".
      
rb://553 approved by Marko
parent f6acea69
set global innodb_stats_method = default;
select @@innodb_stats_method;
@@innodb_stats_method
nulls_equal
select count(*) from bug30243_3 where org_id is not NULL;
count(*)
20
select count(*) from bug30243_3 where org_id is NULL;
count(*)
16384
select count(*) from bug30243_2 where org_id is not NULL;
count(*)
224
select count(*) from bug30243_2 where org_id is NULL;
count(*)
65536
select @@innodb_stats_method;
@@innodb_stats_method
nulls_equal
analyze table bug30243_1;
Table Op Msg_type Msg_text
test.bug30243_1 analyze status OK
analyze table bug30243_2;
Table Op Msg_type Msg_text
test.bug30243_2 analyze status OK
analyze table bug30243_3;
Table Op Msg_type Msg_text
test.bug30243_3 analyze status OK
set global innodb_stats_method = "NULL";
ERROR 42000: Variable 'stats_method' can't be set to the value of 'NULL'
set global innodb_stats_method = "nulls_ignored";
select @@innodb_stats_method;
@@innodb_stats_method
nulls_ignored
analyze table bug30243_1;
Table Op Msg_type Msg_text
test.bug30243_1 analyze status OK
analyze table bug30243_2;
Table Op Msg_type Msg_text
test.bug30243_2 analyze status OK
analyze table bug30243_3;
Table Op Msg_type Msg_text
test.bug30243_3 analyze status OK
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id ;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index
1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index
1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index
select @@innodb_stats_method;
@@innodb_stats_method
nulls_ignored
set global innodb_stats_method = "nulls_unequal";
select @@innodb_stats_method;
@@innodb_stats_method
nulls_unequal
analyze table bug30243_1;
Table Op Msg_type Msg_text
test.bug30243_1 analyze status OK
analyze table bug30243_2;
Table Op Msg_type Msg_text
test.bug30243_2 analyze status OK
analyze table bug30243_3;
Table Op Msg_type Msg_text
test.bug30243_3 analyze status OK
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index
1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index
1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index
SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL;
COUNT(*)
1024
set global innodb_stats_method = "nulls_unequal";
analyze table table_bug30423;
Table Op Msg_type Msg_text
test.table_bug30423 analyze status OK
set global innodb_stats_method = "nulls_ignored";
analyze table table_bug30423;
Table Op Msg_type Msg_text
test.table_bug30423 analyze status OK
set global innodb_stats_method = nulls_equal;
drop table bug30243_2;
drop table bug30243_1;
drop table bug30243_3;
drop table table_bug30423;
# Test for Bug #30423, InnoDBs treatment of NULL in index stats causes
# bad "rows examined" estimates.
# Implemented InnoDB system variable "innodb_stats_method" with
# "nulls_equal" (default), "nulls_unequal", and "nulls_ignored" options.
-- source include/have_innodb.inc
let $innodb_stats_method_orig = `select @@innodb_stats_method`;
# default setting for innodb_stats_method is "nulls_equal"
set global innodb_stats_method = default;
select @@innodb_stats_method;
# create three tables, bug30243_1, bug30243_2 and bug30243_3.
# The test scenario is adopted from original bug #30423 report.
# table bug30243_1 and bug30243_3 have many NULL values
-- disable_result_log
-- disable_query_log
DROP TABLE IF EXISTS bug30243_1;
CREATE TABLE bug30243_1 (
org_id int(11) NOT NULL default '0',
UNIQUE KEY (org_id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
LOCK TABLES bug30243_1 WRITE;
INSERT INTO bug30243_1 VALUES (11),(15),(16),(17),(19),(20),(21),(23),(24),
(25),(26),(27),(28),(29),(30),(31),(32),(33),(34),(35),(37),(38),(40),(41),
(42),(43),(44),(45),(46),(47),(48),(49),(50),(51),(52),(53),(54),(55),(56),
(57),(58),(59),(60),(61),(62),(63),(64),(65),(66),(67),(68),(69),(70),(71),
(72),(73),(74),(75),(76),(77),(78),(79),(80),(81),(82),(83),(84),(85),(86),
(87),(88),(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101),
(102),(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114),
(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126),(127),
(128),(129),(130),(131),(132),(133),(134),(135),(136),(137),(138),(139),(140),
(141),(142),(143),(144),(145);
UNLOCK TABLES;
DROP TABLE IF EXISTS bug30243_3;
CREATE TABLE bug30243_3 (
org_id int(11) default NULL,
KEY (org_id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO bug30243_3 VALUES (NULL);
begin;
let $i=14;
while ($i)
{
INSERT INTO bug30243_3 SELECT NULL FROM bug30243_3;
dec $i;
}
INSERT INTO bug30243_3 VALUES (34),(34),(35),(56),(58),(62),(62),(64),(65),(66),(80),(135),(137),(138),(139),(140),(142),(143),(144),(145);
commit;
DROP TABLE IF EXISTS bug30243_2;
CREATE TABLE bug30243_2 (
org_id int(11) default NULL,
KEY `contacts$org_id` (org_id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO bug30243_2 VALUES (NULL);
begin;
let $i=16;
while ($i)
{
INSERT INTO bug30243_2 SELECT NULL FROM bug30243_2;
dec $i;
}
INSERT INTO bug30243_2 VALUES (11),(15),(16),(17),(20),(21),(23),(24),(25),
(26),(27),(28),(29),(30),(31),(32),(33),(34),(37),(38),(40),(41),(42),(43),
(44),(45),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(48),
(48),(50),(51),(52),(52),(53),(54),(55),(57),(60),(61),(62),(62),(62),(62),
(62),(63),(64),(64),(65),(66),(66),(67),(68),(69),(70),(71),(72),(73),(74),
(75),(76),(77),(78),(79),(80),(80),(81),(82),(83),(84),(85),(86),(87),(88),
(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101),(102),
(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114),
(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126),
(127),(128),(129),(130),(131),(132),(133),(133),(135),(135),(135),(135),
(136),(136),(138),(138),(139),(139),(139),(140),(141),(141),(142),(143),
(143),(145),(145);
commit;
-- enable_result_log
-- enable_query_log
# check tables's value
select count(*) from bug30243_3 where org_id is not NULL;
select count(*) from bug30243_3 where org_id is NULL;
select count(*) from bug30243_2 where org_id is not NULL;
select count(*) from bug30243_2 where org_id is NULL;
select @@innodb_stats_method;
analyze table bug30243_1;
analyze table bug30243_2;
analyze table bug30243_3;
# Following query plan shows that we over estimate the rows per
# unique value (since there are many NULLs).
# Skip this query log since the stats estimate could vary from runs
-- disable_query_log
-- disable_result_log
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id ;
-- enable_query_log
-- enable_result_log
# following set operation will fail
#--error ER_WRONG_VALUE_FOR_VAR
--error 1231
set global innodb_stats_method = "NULL";
set global innodb_stats_method = "nulls_ignored";
select @@innodb_stats_method;
# Regenerate the stats with "nulls_ignored" option
analyze table bug30243_1;
analyze table bug30243_2;
analyze table bug30243_3;
# Following query plan shows that we get the correct rows per
# unique value (should be approximately 1 row per value)
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id ;
select @@innodb_stats_method;
# Try the "nulls_unequal" option
set global innodb_stats_method = "nulls_unequal";
select @@innodb_stats_method;
analyze table bug30243_1;
analyze table bug30243_2;
analyze table bug30243_3;
# Following query plan shows that we get the correct rows per
# unique value (~1)
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id;
# Create a table with all NULL values, make sure the stats calculation
# does not crash with table of all NULL values
-- disable_query_log
CREATE TABLE table_bug30423 (
org_id int(11) default NULL,
KEY(org_id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO `table_bug30423` VALUES (NULL);
begin;
let $i=10;
while ($i)
{
INSERT INTO table_bug30423 SELECT NULL FROM table_bug30423;
dec $i;
}
commit;
-- enable_query_log
SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL;
# calculate the statistics for the table for "nulls_ignored" and
# "nulls_unequal" option
set global innodb_stats_method = "nulls_unequal";
analyze table table_bug30423;
set global innodb_stats_method = "nulls_ignored";
analyze table table_bug30423;
eval set global innodb_stats_method = $innodb_stats_method_orig;
drop table bug30243_2;
drop table bug30243_1;
drop table bug30243_3;
drop table table_bug30423;
set global innodb_stats_method = default;
select @@innodb_stats_method;
@@innodb_stats_method
nulls_equal
select count(*) from bug30243_3 where org_id is not NULL;
count(*)
20
select count(*) from bug30243_3 where org_id is NULL;
count(*)
16384
select count(*) from bug30243_2 where org_id is not NULL;
count(*)
224
select count(*) from bug30243_2 where org_id is NULL;
count(*)
65536
select @@innodb_stats_method;
@@innodb_stats_method
nulls_equal
analyze table bug30243_1;
Table Op Msg_type Msg_text
test.bug30243_1 analyze status OK
analyze table bug30243_2;
Table Op Msg_type Msg_text
test.bug30243_2 analyze status OK
analyze table bug30243_3;
Table Op Msg_type Msg_text
test.bug30243_3 analyze status OK
set global innodb_stats_method = "NULL";
ERROR 42000: Variable 'stats_method' can't be set to the value of 'NULL'
set global innodb_stats_method = "nulls_ignored";
select @@innodb_stats_method;
@@innodb_stats_method
nulls_ignored
analyze table bug30243_1;
Table Op Msg_type Msg_text
test.bug30243_1 analyze status OK
analyze table bug30243_2;
Table Op Msg_type Msg_text
test.bug30243_2 analyze status OK
analyze table bug30243_3;
Table Op Msg_type Msg_text
test.bug30243_3 analyze status OK
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id ;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index
1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index
1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index
select @@innodb_stats_method;
@@innodb_stats_method
nulls_ignored
set global innodb_stats_method = "nulls_unequal";
select @@innodb_stats_method;
@@innodb_stats_method
nulls_unequal
analyze table bug30243_1;
Table Op Msg_type Msg_text
test.bug30243_1 analyze status OK
analyze table bug30243_2;
Table Op Msg_type Msg_text
test.bug30243_2 analyze status OK
analyze table bug30243_3;
Table Op Msg_type Msg_text
test.bug30243_3 analyze status OK
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE orgs index NULL org_id 4 NULL 128 Using index
1 SIMPLE sa_opportunities ref org_id org_id 5 test.orgs.org_id 1 Using index
1 SIMPLE contacts ref contacts$org_id contacts$org_id 5 test.orgs.org_id 1 Using index
SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL;
COUNT(*)
1024
set global innodb_stats_method = "nulls_unequal";
analyze table table_bug30423;
Table Op Msg_type Msg_text
test.table_bug30423 analyze status OK
set global innodb_stats_method = "nulls_ignored";
analyze table table_bug30423;
Table Op Msg_type Msg_text
test.table_bug30423 analyze status OK
set global innodb_stats_method = nulls_equal;
drop table bug30243_2;
drop table bug30243_1;
drop table bug30243_3;
drop table table_bug30423;
# Test for Bug #30423, InnoDBs treatment of NULL in index stats causes
# bad "rows examined" estimates.
# Implemented InnoDB system variable "innodb_stats_method" with
# "nulls_equal" (default), "nulls_unequal", and "nulls_ignored" options.
-- source include/have_innodb_plugin.inc
let $innodb_stats_method_orig = `select @@innodb_stats_method`;
# default setting for innodb_stats_method is "nulls_equal"
set global innodb_stats_method = default;
select @@innodb_stats_method;
# create three tables, bug30243_1, bug30243_2 and bug30243_3.
# The test scenario is adopted from original bug #30423 report.
# table bug30243_1 and bug30243_3 have many NULL values
-- disable_result_log
-- disable_query_log
DROP TABLE IF EXISTS bug30243_1;
CREATE TABLE bug30243_1 (
org_id int(11) NOT NULL default '0',
UNIQUE KEY (org_id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
LOCK TABLES bug30243_1 WRITE;
INSERT INTO bug30243_1 VALUES (11),(15),(16),(17),(19),(20),(21),(23),(24),
(25),(26),(27),(28),(29),(30),(31),(32),(33),(34),(35),(37),(38),(40),(41),
(42),(43),(44),(45),(46),(47),(48),(49),(50),(51),(52),(53),(54),(55),(56),
(57),(58),(59),(60),(61),(62),(63),(64),(65),(66),(67),(68),(69),(70),(71),
(72),(73),(74),(75),(76),(77),(78),(79),(80),(81),(82),(83),(84),(85),(86),
(87),(88),(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101),
(102),(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114),
(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126),(127),
(128),(129),(130),(131),(132),(133),(134),(135),(136),(137),(138),(139),(140),
(141),(142),(143),(144),(145);
UNLOCK TABLES;
DROP TABLE IF EXISTS bug30243_3;
CREATE TABLE bug30243_3 (
org_id int(11) default NULL,
KEY (org_id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO bug30243_3 VALUES (NULL);
begin;
let $i=14;
while ($i)
{
INSERT INTO bug30243_3 SELECT NULL FROM bug30243_3;
dec $i;
}
INSERT INTO bug30243_3 VALUES (34),(34),(35),(56),(58),(62),(62),(64),(65),(66),(80),(135),(137),(138),(139),(140),(142),(143),(144),(145);
commit;
DROP TABLE IF EXISTS bug30243_2;
CREATE TABLE bug30243_2 (
org_id int(11) default NULL,
KEY `contacts$org_id` (org_id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO bug30243_2 VALUES (NULL);
begin;
let $i=16;
while ($i)
{
INSERT INTO bug30243_2 SELECT NULL FROM bug30243_2;
dec $i;
}
INSERT INTO bug30243_2 VALUES (11),(15),(16),(17),(20),(21),(23),(24),(25),
(26),(27),(28),(29),(30),(31),(32),(33),(34),(37),(38),(40),(41),(42),(43),
(44),(45),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),
(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(46),(48),
(48),(50),(51),(52),(52),(53),(54),(55),(57),(60),(61),(62),(62),(62),(62),
(62),(63),(64),(64),(65),(66),(66),(67),(68),(69),(70),(71),(72),(73),(74),
(75),(76),(77),(78),(79),(80),(80),(81),(82),(83),(84),(85),(86),(87),(88),
(89),(90),(91),(92),(93),(94),(95),(96),(97),(98),(99),(100),(101),(102),
(103),(104),(105),(106),(107),(108),(109),(110),(111),(112),(113),(114),
(115),(116),(117),(118),(119),(120),(121),(122),(123),(124),(125),(126),
(127),(128),(129),(130),(131),(132),(133),(133),(135),(135),(135),(135),
(136),(136),(138),(138),(139),(139),(139),(140),(141),(141),(142),(143),
(143),(145),(145);
commit;
-- enable_result_log
-- enable_query_log
# check tables's value
select count(*) from bug30243_3 where org_id is not NULL;
select count(*) from bug30243_3 where org_id is NULL;
select count(*) from bug30243_2 where org_id is not NULL;
select count(*) from bug30243_2 where org_id is NULL;
select @@innodb_stats_method;
analyze table bug30243_1;
analyze table bug30243_2;
analyze table bug30243_3;
# Following query plan shows that we over estimate the rows per
# unique value (since there are many NULLs).
# Skip this query log since the stats estimate could vary from runs
-- disable_query_log
-- disable_result_log
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id ;
-- enable_query_log
-- enable_result_log
# following set operation will fail
#--error ER_WRONG_VALUE_FOR_VAR
--error 1231
set global innodb_stats_method = "NULL";
set global innodb_stats_method = "nulls_ignored";
select @@innodb_stats_method;
# Regenerate the stats with "nulls_ignored" option
analyze table bug30243_1;
analyze table bug30243_2;
analyze table bug30243_3;
# Following query plan shows that we get the correct rows per
# unique value (should be approximately 1 row per value)
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id ;
select @@innodb_stats_method;
# Try the "nulls_unequal" option
set global innodb_stats_method = "nulls_unequal";
select @@innodb_stats_method;
analyze table bug30243_1;
analyze table bug30243_2;
analyze table bug30243_3;
# Following query plan shows that we get the correct rows per
# unique value (~1)
explain SELECT COUNT(*), 0
FROM bug30243_1 orgs
LEFT JOIN bug30243_3 sa_opportunities
ON orgs.org_id=sa_opportunities.org_id
LEFT JOIN bug30243_2 contacts
ON orgs.org_id=contacts.org_id;
# Create a table with all NULL values, make sure the stats calculation
# does not crash with table of all NULL values
-- disable_query_log
CREATE TABLE table_bug30423 (
org_id int(11) default NULL,
KEY(org_id)
) ENGINE=InnoDB DEFAULT CHARSET=latin1;
INSERT INTO `table_bug30423` VALUES (NULL);
begin;
let $i=10;
while ($i)
{
INSERT INTO table_bug30423 SELECT NULL FROM table_bug30423;
dec $i;
}
commit;
-- enable_query_log
SELECT COUNT(*) FROM table_bug30423 WHERE org_id IS NULL;
# calculate the statistics for the table for "nulls_ignored" and
# "nulls_unequal" option
set global innodb_stats_method = "nulls_unequal";
analyze table table_bug30423;
set global innodb_stats_method = "nulls_ignored";
analyze table table_bug30423;
eval set global innodb_stats_method = $innodb_stats_method_orig;
drop table bug30243_2;
drop table bug30243_1;
drop table bug30243_3;
drop table table_bug30423;
......@@ -66,6 +66,13 @@ this many index pages */
/*--------------------------------------*/
#define BTR_BLOB_HDR_SIZE 8
/* Estimated table level stats from sampled value. */
#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, ext_size, not_empty) \
((value * (ib_longlong) index->stat_n_leaf_pages \
+ BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 + ext_size \
+ not_empty) \
/ (BTR_KEY_VAL_ESTIMATE_N_PAGES + ext_size))
/***********************************************************************
Marks all extern fields in a record as owned by the record. This function
should be called if the delete mark of a record is removed: a not delete
......@@ -2834,10 +2841,55 @@ btr_estimate_n_rows_in_range(
}
}
/***********************************************************************
Record the number of non_null key values in a given index for
each n-column prefix of the index where n < dict_index_get_n_unique(index).
The estimates are eventually stored in the array:
index->stat_n_non_null_key_vals. */
static
void
btr_record_not_null_field_in_rec(
/*=============================*/
rec_t* rec, /* in: physical record */
ulint n_unique, /* in: dict_index_get_n_unique(index),
number of columns uniquely determine
an index entry */
const ulint* offsets, /* in: rec_get_offsets(rec, index),
its size could be for all fields or
that of "n_unique" */
ib_longlong* n_not_null) /* in/out: array to record number of
not null rows for n-column prefix */
{
ulint i;
ut_ad(rec_offs_n_fields(offsets) >= n_unique);
if (n_not_null == NULL) {
return;
}
for (i = 0; i < n_unique; i++) {
ulint rec_len;
byte* field;
field = rec_get_nth_field(rec, offsets, i, &rec_len);
if (rec_len != UNIV_SQL_NULL) {
n_not_null[i]++;
} else {
/* Break if we hit the first NULL value */
break;
}
}
}
/***********************************************************************
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
The estimates are stored in the array index->stat_n_diff_key_vals.
If innodb_stats_method is "nulls_ignored", we also record the number of
non-null values for each prefix and store the estimates in
array index->stat_n_non_null_key_vals. */
void
btr_estimate_number_of_different_key_vals(
......@@ -2851,6 +2903,8 @@ btr_estimate_number_of_different_key_vals(
ulint matched_fields;
ulint matched_bytes;
ib_longlong* n_diff;
ib_longlong* n_not_null;
ibool stats_null_not_equal;
ulint not_empty_flag = 0;
ulint total_external_size = 0;
ulint i;
......@@ -2858,24 +2912,47 @@ btr_estimate_number_of_different_key_vals(
ulint add_on;
mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_rec_[REC_OFFS_NORMAL_SIZE];
ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
ulint* offsets_rec = offsets_rec_;
ulint* offsets_next_rec= offsets_next_rec_;
*offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_;
*offsets_next_rec_
= (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_;
ulint* offsets_rec = NULL;
ulint* offsets_next_rec = NULL;
n_cols = dict_index_get_n_unique(index);
n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong));
heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
* (n_cols + 1)
+ dict_index_get_n_fields(index)
* (sizeof *offsets_rec
+ sizeof *offsets_next_rec));
n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_longlong));
n_not_null = NULL;
/* Check srv_innodb_stats_method setting, and decide whether we
need to record non-null value and also decide if NULL is
considered equal (by setting stats_null_not_equal value) */
switch (srv_innodb_stats_method) {
case SRV_STATS_NULLS_IGNORED:
n_not_null = mem_heap_zalloc(heap, (n_cols + 1)
* sizeof *n_not_null);
/* fall through */
case SRV_STATS_NULLS_UNEQUAL:
/* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
case, we will treat NULLs as unequal value */
stats_null_not_equal = TRUE;
break;
memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong));
case SRV_STATS_NULLS_EQUAL:
stats_null_not_equal = FALSE;
break;
default:
ut_error;
}
/* We sample some pages in the index to get an estimate */
for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) {
rec_t* supremum;
mtr_start(&mtr);
btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
......@@ -2888,18 +2965,22 @@ btr_estimate_number_of_different_key_vals(
page = btr_cur_get_page(&cursor);
supremum = page_get_supremum_rec(page);
rec = page_rec_get_next(page_get_infimum_rec(page));
if (rec != supremum) {
if (!page_rec_is_supremum(rec)) {
not_empty_flag = 1;
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
ULINT_UNDEFINED, &heap);
if (n_not_null) {
btr_record_not_null_field_in_rec(
rec, n_cols, offsets_rec, n_not_null);
}
}
while (rec != supremum) {
while (!page_rec_is_supremum(rec)) {
rec_t* next_rec = page_rec_get_next(rec);
if (next_rec == supremum) {
if (page_rec_is_supremum(next_rec)) {
break;
}
......@@ -2911,7 +2992,8 @@ btr_estimate_number_of_different_key_vals(
cmp_rec_rec_with_match(rec, next_rec,
offsets_rec, offsets_next_rec,
index, &matched_fields,
index, stats_null_not_equal,
&matched_fields,
&matched_bytes);
for (j = matched_fields + 1; j <= n_cols; j++) {
......@@ -2921,6 +3003,12 @@ btr_estimate_number_of_different_key_vals(
n_diff[j]++;
}
if (n_not_null) {
btr_record_not_null_field_in_rec(
next_rec, n_cols, offsets_next_rec,
n_not_null);
}
total_external_size
+= btr_rec_get_externally_stored_len(
rec, offsets_rec);
......@@ -2971,14 +3059,8 @@ btr_estimate_number_of_different_key_vals(
included in index->stat_n_leaf_pages) */
for (j = 0; j <= n_cols; j++) {
index->stat_n_diff_key_vals[j]
= ((n_diff[j]
* (ib_longlong)index->stat_n_leaf_pages
+ BTR_KEY_VAL_ESTIMATE_N_PAGES - 1
+ total_external_size
+ not_empty_flag)
/ (BTR_KEY_VAL_ESTIMATE_N_PAGES
+ total_external_size));
index->stat_n_diff_key_vals[j] = BTR_TABLE_STATS_FROM_SAMPLE(
n_diff[j], index, total_external_size, not_empty_flag);
/* If the tree is small, smaller than
10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then
......@@ -2997,12 +3079,20 @@ btr_estimate_number_of_different_key_vals(
}
index->stat_n_diff_key_vals[j] += add_on;
}
mem_free(n_diff);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
/* Update the stat_n_non_null_key_vals[] with our
sampled result. stat_n_non_null_key_vals[] is created
and initialized to zero in dict_index_add_to_cache(),
along with stat_n_diff_key_vals[] array */
if (n_not_null != NULL && (j < n_cols)) {
index->stat_n_non_null_key_vals[j] =
BTR_TABLE_STATS_FROM_SAMPLE(
n_not_null[j], index,
total_external_size, not_empty_flag);
}
}
mem_heap_free(heap);
}
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
......
......@@ -1358,6 +1358,12 @@ dict_index_add_to_cache(
new_index->heap,
(1 + dict_index_get_n_unique(new_index))
* sizeof(ib_longlong));
new_index->stat_n_non_null_key_vals = mem_heap_zalloc(
new_index->heap,
(1 + dict_index_get_n_unique(new_index))
* sizeof(*new_index->stat_n_non_null_key_vals));
/* Give some sensible values to stat_n_... in case we do
not calculate statistics quickly enough */
......@@ -3817,6 +3823,10 @@ dict_update_statistics_low(
for (i = dict_index_get_n_unique(index); i; ) {
index->stat_n_diff_key_vals[i--] = 1;
}
memset(index->stat_n_non_null_key_vals, 0,
(1 + dict_index_get_n_unique(index))
* sizeof(*index->stat_n_non_null_key_vals));
}
index = dict_table_get_next_index(index);
......
......@@ -130,6 +130,25 @@ static my_bool innobase_adaptive_hash_index = TRUE;
static char* internal_innobase_data_file_path = NULL;
/* Possible values for system variable "innodb_stats_method". The values
are defined the same as its corresponding MyISAM system variable
"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
static const char* innodb_stats_method_names[] = {
"nulls_equal",
"nulls_unequal",
"nulls_ignored",
NullS
};
/* Used to define an enumerate type of the system variable innodb_stats_method.
This is the same as "myisam_stats_method_typelib" */
static TYPELIB innodb_stats_method_typelib = {
array_elements(innodb_stats_method_names) - 1,
"innodb_stats_method_typelib",
innodb_stats_method_names,
NULL
};
/* The following counter is used to convey information to InnoDB
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
......@@ -6362,6 +6381,65 @@ ha_innobase::read_time(
return(ranges + (double) rows / (double) total_rows * time_for_scan);
}
/*************************************************************************
Calculate Record Per Key value. Need to exclude the NULL value if
innodb_stats_method is set to "nulls_ignored" */
static
ha_rows
innodb_rec_per_key(
/*===============*/
/* out: estimated record per key
value */
dict_index_t* index, /* in: dict_index_t structure */
ulint i, /* in: the column we are
calculating rec per key */
ha_rows records) /* in: estimated total records */
{
ha_rows rec_per_key;
ut_ad(i < dict_index_get_n_unique(index));
/* Note the stat_n_diff_key_vals[] stores the diff value with
n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */
if (index->stat_n_diff_key_vals[i + 1] == 0) {
rec_per_key = records;
} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
ib_longlong num_null;
/* Number of rows with NULL value in this
field */
num_null = records - index->stat_n_non_null_key_vals[i];
/* In theory, index->stat_n_non_null_key_vals[i]
should always be less than the number of records.
Since this is statistics value, the value could
have slight discrepancy. But we will make sure
the number of null values is not a negative number. */
num_null = (num_null < 0) ? 0 : num_null;
/* If the number of NULL values is the same as or
large than that of the distinct values, we could
consider that the table consists mostly of NULL value.
Set rec_per_key to 1. */
if (index->stat_n_diff_key_vals[i + 1] <= num_null) {
rec_per_key = 1;
} else {
/* Need to exclude rows with NULL values from
rec_per_key calculation */
rec_per_key = (ha_rows)(
(records - num_null)
/ (index->stat_n_diff_key_vals[i + 1]
- num_null));
}
} else {
rec_per_key = (ha_rows)
(records / index->stat_n_diff_key_vals[i + 1]);
}
return(rec_per_key);
}
/*************************************************************************
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */
......@@ -6568,13 +6646,8 @@ ha_innobase::info_low(
break;
}
if (index->stat_n_diff_key_vals[j + 1] == 0) {
rec_per_key = stats.records;
} else {
rec_per_key = (ha_rows)(stats.records /
index->stat_n_diff_key_vals[j + 1]);
}
rec_per_key = innodb_rec_per_key(
index, j, stats.records);
/* Since MySQL seems to favor table scans
too much over index searches, we pretend
......@@ -8990,6 +9063,13 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode,
AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */
AUTOINC_NO_LOCKING, 0); /* Maximum value */
static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
PLUGIN_VAR_RQCMDARG,
"Specifies how InnoDB index statistics collection code should "
"treat NULLs. Possible values are NULLS_EQUAL (default), "
"NULLS_UNEQUAL and NULLS_IGNORED",
NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
......@@ -9031,6 +9111,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(stats_on_metadata),
MYSQL_SYSVAR(use_legacy_cardinality_algorithm),
MYSQL_SYSVAR(adaptive_hash_index),
MYSQL_SYSVAR(stats_method),
MYSQL_SYSVAR(status_file),
MYSQL_SYSVAR(support_xa),
MYSQL_SYSVAR(sync_spin_loops),
......
......@@ -404,7 +404,10 @@ btr_estimate_n_rows_in_range(
/***********************************************************************
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
The estimates are stored in the array index->stat_n_diff_key_vals.
If innodb_stats_method is nulls_ignored, we also record the number of
non-null values for each prefix and stored the estimates in
array index->stat_n_non_null_key_vals. */
void
btr_estimate_number_of_different_key_vals(
......
......@@ -222,6 +222,12 @@ struct dict_index_struct{
for this index, for each n-column prefix
where n <= dict_get_n_unique(index); we
periodically calculate new estimates */
ib_longlong* stat_n_non_null_key_vals;
/* approximate number of non-null key values
for this index, for each column where
n < dict_get_n_unique(index); This
is used when innodb_stats_method is
"nulls_ignored". */
ulint stat_index_size;
/* approximate index size in database pages */
ulint stat_n_leaf_pages;
......
......@@ -141,6 +141,10 @@ cmp_rec_rec_with_match(
const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index, /* in: data dictionary index */
ibool nulls_unequal,
/* in: TRUE if this is for index statistics
cardinality estimation, and innodb_stats_method
is "nulls_unequal" or "nulls_ignored" */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when the function returns,
contains the value the for current
......
......@@ -72,5 +72,5 @@ cmp_rec_rec(
ulint match_b = 0;
return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
&match_f, &match_b));
FALSE, &match_f, &match_b));
}
......@@ -91,6 +91,11 @@ extern ulint srv_lock_table_size;
extern ulint srv_n_file_io_threads;
/* The "innodb_stats_method" setting, decides how InnoDB is going
to treat NULL value when collecting statistics. It is not defined
as enum type because the configure option takes unsigned integer type. */
extern ulong srv_innodb_stats_method;
#ifdef UNIV_LOG_ARCHIVE
extern ibool srv_log_archive_on;
extern ibool srv_archive_recovery;
......@@ -286,6 +291,19 @@ of lower numbers are included. */
#define SRV_FORCE_NO_LOG_REDO 6 /* do not do the log roll-forward
in connection with recovery */
/* Alternatives for srv_innodb_stats_method, which could be changed by
setting innodb_stats_method */
enum srv_stats_method_name_enum {
SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as
equal. This is the default setting
for innodb_stats_method */
SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as
NOT equal. */
SRV_STATS_NULLS_IGNORED /* NULL values are ignored */
};
typedef enum srv_stats_method_name_enum srv_stats_method_name_t;
/*************************************************************************
Boots Innobase server. */
......
......@@ -720,6 +720,10 @@ cmp_rec_rec_with_match(
const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index, /* in: data dictionary index */
ibool nulls_unequal,
/* in: TRUE if this is for index statistics
cardinality estimation, and innodb_stats_method
is "nulls_unequal" or "nulls_ignored" */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when the function returns,
contains the value the for current
......@@ -821,9 +825,13 @@ cmp_rec_rec_with_match(
|| rec2_f_len == UNIV_SQL_NULL) {
if (rec1_f_len == rec2_f_len) {
goto next_field;
/* This is limited to stats collection,
cannot use it for regular search */
if (nulls_unequal) {
ret = -1;
} else {
goto next_field;
}
} else if (rec2_f_len == UNIV_SQL_NULL) {
/* We define the SQL null to be the
......
......@@ -218,6 +218,11 @@ ulong srv_max_buf_pool_modified_pct = 90;
/* variable counts amount of data read in total (in bytes) */
ulint srv_data_read = 0;
/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
NULL value when collecting statistics. By default, it is set to
SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
/* here we count the amount of data written in total (in bytes) */
ulint srv_data_written = 0;
......
2011-01-14 The InnoDB Team
* btr/btr0cur.c, dict/dict0dict.c, handler/ha_innodb.cc,
include/btr0cur.h, include/dict0mem.h, include/rem0cmp.h,
include/rem0cmp.ic, include/srv0srv.h, rem/rem0cmp.c,
srv/srv0srv.c, innodb_bug30423.test:
Fix Bug#30423 InnoDBs treatment of NULL in index stats causes
bad "rows examined" estimates
2011-01-06 The InnoDB Team
* handler/i_s.cc, include/trx0i_s.h, trx/trx0i_s.c:
Fix Bug#55397 cannot select from innodb_trx when trx_query contains
......
......@@ -100,6 +100,18 @@ can be released by page reorganize, then it is reorganized */
/*--------------------------------------*/
#define BTR_BLOB_HDR_SIZE 8 /*!< Size of a BLOB
part header, in bytes */
/** Estimated table level stats from sampled value.
@param value sampled stats
@param index index being sampled
@param sample number of sampled rows
@param ext_size external stored data size
@param not_empty table not empty
@return estimated table wide stats from sampled value */
#define BTR_TABLE_STATS_FROM_SAMPLE(value, index, sample, ext_size, not_empty)\
(((value) * (ib_int64_t) index->stat_n_leaf_pages \
+ (sample) - 1 + (ext_size) + (not_empty)) / ((sample) + (ext_size)))
/* @} */
#endif /* !UNIV_HOTBACKUP */
......@@ -3200,10 +3212,55 @@ btr_estimate_n_rows_in_range(
}
}
/*******************************************************************//**
Record the number of non_null key values in a given index for
each n-column prefix of the index where n < dict_index_get_n_unique(index).
The estimates are eventually stored in the array:
index->stat_n_non_null_key_vals. */
static
void
btr_record_not_null_field_in_rec(
/*=============================*/
rec_t* rec, /*!< in: physical record */
ulint n_unique, /*!< in: dict_index_get_n_unique(index),
number of columns uniquely determine
an index entry */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index),
its size could be for all fields or
that of "n_unique" */
ib_int64_t* n_not_null) /*!< in/out: array to record number of
not null rows for n-column prefix */
{
ulint i;
ut_ad(rec_offs_n_fields(offsets) >= n_unique);
if (n_not_null == NULL) {
return;
}
for (i = 0; i < n_unique; i++) {
ulint rec_len;
byte* field;
field = rec_get_nth_field(rec, offsets, i, &rec_len);
if (rec_len != UNIV_SQL_NULL) {
n_not_null[i]++;
} else {
/* Break if we hit the first NULL value */
break;
}
}
}
/*******************************************************************//**
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
The estimates are stored in the array index->stat_n_diff_key_vals.
If innodb_stats_method is "nulls_ignored", we also record the number of
non-null values for each prefix and store the estimates in
array index->stat_n_non_null_key_vals. */
UNIV_INTERN
void
btr_estimate_number_of_different_key_vals(
......@@ -3217,6 +3274,8 @@ btr_estimate_number_of_different_key_vals(
ulint matched_fields;
ulint matched_bytes;
ib_int64_t* n_diff;
ib_int64_t* n_not_null;
ibool stats_null_not_equal;
ullint n_sample_pages; /* number of pages to sample */
ulint not_empty_flag = 0;
ulint total_external_size = 0;
......@@ -3225,16 +3284,43 @@ btr_estimate_number_of_different_key_vals(
ullint add_on;
mtr_t mtr;
mem_heap_t* heap = NULL;
ulint offsets_rec_[REC_OFFS_NORMAL_SIZE];
ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE];
ulint* offsets_rec = offsets_rec_;
ulint* offsets_next_rec= offsets_next_rec_;
rec_offs_init(offsets_rec_);
rec_offs_init(offsets_next_rec_);
ulint* offsets_rec = NULL;
ulint* offsets_next_rec = NULL;
n_cols = dict_index_get_n_unique(index);
n_diff = mem_zalloc((n_cols + 1) * sizeof(ib_int64_t));
heap = mem_heap_create((sizeof *n_diff + sizeof *n_not_null)
* (n_cols + 1)
+ dict_index_get_n_fields(index)
* (sizeof *offsets_rec
+ sizeof *offsets_next_rec));
n_diff = mem_heap_zalloc(heap, (n_cols + 1) * sizeof(ib_int64_t));
n_not_null = NULL;
/* Check srv_innodb_stats_method setting, and decide whether we
need to record non-null value and also decide if NULL is
considered equal (by setting stats_null_not_equal value) */
switch (srv_innodb_stats_method) {
case SRV_STATS_NULLS_IGNORED:
n_not_null = mem_heap_zalloc(heap, (n_cols + 1)
* sizeof *n_not_null);
/* fall through */
case SRV_STATS_NULLS_UNEQUAL:
/* for both SRV_STATS_NULLS_IGNORED and SRV_STATS_NULLS_UNEQUAL
case, we will treat NULLs as unequal value */
stats_null_not_equal = TRUE;
break;
case SRV_STATS_NULLS_EQUAL:
stats_null_not_equal = FALSE;
break;
default:
ut_error;
}
/* It makes no sense to test more pages than are contained
in the index, thus we lower the number if it is too high */
......@@ -3251,7 +3337,6 @@ btr_estimate_number_of_different_key_vals(
/* We sample some pages in the index to get an estimate */
for (i = 0; i < n_sample_pages; i++) {
rec_t* supremum;
mtr_start(&mtr);
btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr);
......@@ -3264,18 +3349,22 @@ btr_estimate_number_of_different_key_vals(
page = btr_cur_get_page(&cursor);
supremum = page_get_supremum_rec(page);
rec = page_rec_get_next(page_get_infimum_rec(page));
if (rec != supremum) {
if (!page_rec_is_supremum(rec)) {
not_empty_flag = 1;
offsets_rec = rec_get_offsets(rec, index, offsets_rec,
ULINT_UNDEFINED, &heap);
if (n_not_null) {
btr_record_not_null_field_in_rec(
rec, n_cols, offsets_rec, n_not_null);
}
}
while (rec != supremum) {
while (!page_rec_is_supremum(rec)) {
rec_t* next_rec = page_rec_get_next(rec);
if (next_rec == supremum) {
if (page_rec_is_supremum(next_rec)) {
break;
}
......@@ -3287,7 +3376,8 @@ btr_estimate_number_of_different_key_vals(
cmp_rec_rec_with_match(rec, next_rec,
offsets_rec, offsets_next_rec,
index, &matched_fields,
index, stats_null_not_equal,
&matched_fields,
&matched_bytes);
for (j = matched_fields + 1; j <= n_cols; j++) {
......@@ -3297,6 +3387,12 @@ btr_estimate_number_of_different_key_vals(
n_diff[j]++;
}
if (n_not_null) {
btr_record_not_null_field_in_rec(
next_rec, n_cols, offsets_next_rec,
n_not_null);
}
total_external_size
+= btr_rec_get_externally_stored_len(
rec, offsets_rec);
......@@ -3348,13 +3444,9 @@ btr_estimate_number_of_different_key_vals(
for (j = 0; j <= n_cols; j++) {
index->stat_n_diff_key_vals[j]
= ((n_diff[j]
* (ib_int64_t)index->stat_n_leaf_pages
+ n_sample_pages - 1
+ total_external_size
+ not_empty_flag)
/ (n_sample_pages
+ total_external_size));
= BTR_TABLE_STATS_FROM_SAMPLE(
n_diff[j], index, n_sample_pages,
total_external_size, not_empty_flag);
/* If the tree is small, smaller than
10 * n_sample_pages + total_external_size, then
......@@ -3373,12 +3465,20 @@ btr_estimate_number_of_different_key_vals(
}
index->stat_n_diff_key_vals[j] += add_on;
}
mem_free(n_diff);
if (UNIV_LIKELY_NULL(heap)) {
mem_heap_free(heap);
/* Update the stat_n_non_null_key_vals[] with our
sampled result. stat_n_non_null_key_vals[] is created
and initialized to zero in dict_index_add_to_cache(),
along with stat_n_diff_key_vals[] array */
if (n_not_null != NULL && (j < n_cols)) {
index->stat_n_non_null_key_vals[j] =
BTR_TABLE_STATS_FROM_SAMPLE(
n_not_null[j], index, n_sample_pages,
total_external_size, not_empty_flag);
}
}
mem_heap_free(heap);
}
/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/
......
......@@ -1669,6 +1669,12 @@ dict_index_add_to_cache(
new_index->heap,
(1 + dict_index_get_n_unique(new_index))
* sizeof(ib_int64_t));
new_index->stat_n_non_null_key_vals = mem_heap_zalloc(
new_index->heap,
(1 + dict_index_get_n_unique(new_index))
* sizeof(*new_index->stat_n_non_null_key_vals));
/* Give some sensible values to stat_n_... in case we do
not calculate statistics quickly enough */
......@@ -4291,6 +4297,10 @@ dict_update_statistics(
for (i = dict_index_get_n_unique(index); i; ) {
index->stat_n_diff_key_vals[i--] = 1;
}
memset(index->stat_n_non_null_key_vals, 0,
(1 + dict_index_get_n_unique(index))
* sizeof(*index->stat_n_non_null_key_vals));
}
index = dict_table_get_next_index(index);
......
......@@ -174,6 +174,25 @@ static char* internal_innobase_data_file_path = NULL;
static char* innodb_version_str = (char*) INNODB_VERSION_STR;
/** Possible values for system variable "innodb_stats_method". The values
are defined the same as its corresponding MyISAM system variable
"myisam_stats_method"(see "myisam_stats_method_names"), for better usability */
static const char* innodb_stats_method_names[] = {
"nulls_equal",
"nulls_unequal",
"nulls_ignored",
NullS
};
/** Used to define an enumerate type of the system variable innodb_stats_method.
This is the same as "myisam_stats_method_typelib" */
static TYPELIB innodb_stats_method_typelib = {
array_elements(innodb_stats_method_names) - 1,
"innodb_stats_method_typelib",
innodb_stats_method_names,
NULL
};
/* The following counter is used to convey information to InnoDB
about server activity: in selects it is not sensible to call
srv_active_wake_master_thread after each fetch or search, we only do
......@@ -7507,6 +7526,65 @@ innobase_get_mysql_key_number_for_index(
return(0);
}
/*********************************************************************//**
Calculate Record Per Key value. Need to exclude the NULL value if
innodb_stats_method is set to "nulls_ignored"
@return estimated record per key value */
static
ha_rows
innodb_rec_per_key(
/*===============*/
dict_index_t* index, /*!< in: dict_index_t structure */
ulint i, /*!< in: the column we are
calculating rec per key */
ha_rows records) /*!< in: estimated total records */
{
ha_rows rec_per_key;
ut_ad(i < dict_index_get_n_unique(index));
/* Note the stat_n_diff_key_vals[] stores the diff value with
n-prefix indexing, so it is always stat_n_diff_key_vals[i + 1] */
if (index->stat_n_diff_key_vals[i + 1] == 0) {
rec_per_key = records;
} else if (srv_innodb_stats_method == SRV_STATS_NULLS_IGNORED) {
ib_int64_t num_null;
/* Number of rows with NULL value in this
field */
num_null = records - index->stat_n_non_null_key_vals[i];
/* In theory, index->stat_n_non_null_key_vals[i]
should always be less than the number of records.
Since this is statistics value, the value could
have slight discrepancy. But we will make sure
the number of null values is not a negative number. */
num_null = (num_null < 0) ? 0 : num_null;
/* If the number of NULL values is the same as or
large than that of the distinct values, we could
consider that the table consists mostly of NULL value.
Set rec_per_key to 1. */
if (index->stat_n_diff_key_vals[i + 1] <= num_null) {
rec_per_key = 1;
} else {
/* Need to exclude rows with NULL values from
rec_per_key calculation */
rec_per_key = (ha_rows)(
(records - num_null)
/ (index->stat_n_diff_key_vals[i + 1]
- num_null));
}
} else {
rec_per_key = (ha_rows)
(records / index->stat_n_diff_key_vals[i + 1]);
}
return(rec_per_key);
}
/*********************************************************************//**
Returns statistics information of the table to the MySQL interpreter,
in various fields of the handle object. */
......@@ -7737,13 +7815,8 @@ ha_innobase::info_low(
break;
}
if (index->stat_n_diff_key_vals[j + 1] == 0) {
rec_per_key = stats.records;
} else {
rec_per_key = (ha_rows)(stats.records /
index->stat_n_diff_key_vals[j + 1]);
}
rec_per_key = innodb_rec_per_key(
index, j, stats.records);
/* Since MySQL seems to favor table scans
too much over index searches, we pretend
......@@ -10934,6 +11007,13 @@ static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
innodb_change_buffering_validate,
innodb_change_buffering_update, "inserts");
static MYSQL_SYSVAR_ENUM(stats_method, srv_innodb_stats_method,
PLUGIN_VAR_RQCMDARG,
"Specifies how InnoDB index statistics collection code should "
"treat NULLs. Possible values are NULLS_EQUAL (default), "
"NULLS_UNEQUAL and NULLS_IGNORED",
NULL, NULL, SRV_STATS_NULLS_EQUAL, &innodb_stats_method_typelib);
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
static MYSQL_SYSVAR_UINT(change_buffering_debug, ibuf_debug,
PLUGIN_VAR_RQCMDARG,
......@@ -10988,6 +11068,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(stats_on_metadata),
MYSQL_SYSVAR(stats_sample_pages),
MYSQL_SYSVAR(adaptive_hash_index),
MYSQL_SYSVAR(stats_method),
MYSQL_SYSVAR(replication_delay),
MYSQL_SYSVAR(status_file),
MYSQL_SYSVAR(strict_mode),
......
......@@ -478,7 +478,10 @@ btr_estimate_n_rows_in_range(
/*******************************************************************//**
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
The estimates are stored in the array index->stat_n_diff_key_vals. */
The estimates are stored in the array index->stat_n_diff_key_vals.
If innodb_stats_method is nulls_ignored, we also record the number of
non-null values for each prefix and stored the estimates in
array index->stat_n_non_null_key_vals. */
UNIV_INTERN
void
btr_estimate_number_of_different_key_vals(
......
......@@ -321,6 +321,12 @@ struct dict_index_struct{
dict_get_n_unique(index); we
periodically calculate new
estimates */
ib_int64_t* stat_n_non_null_key_vals;
/* approximate number of non-null key values
for this index, for each column where
n < dict_get_n_unique(index); This
is used when innodb_stats_method is
"nulls_ignored". */
ulint stat_index_size;
/*!< approximate index size in
database pages */
......
......@@ -165,6 +165,10 @@ cmp_rec_rec_with_match(
const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
dict_index_t* index, /*!< in: data dictionary index */
ibool nulls_unequal,
/* in: TRUE if this is for index statistics
cardinality estimation, and innodb_stats_method
is "nulls_unequal" or "nulls_ignored" */
ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when the function returns,
contains the value the for current
......
......@@ -87,5 +87,5 @@ cmp_rec_rec(
ulint match_b = 0;
return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
&match_f, &match_b));
FALSE, &match_f, &match_b));
}
......@@ -154,6 +154,11 @@ capacity. PCT_IO(5) -> returns the number of IO operations that
is 5% of the max where max is srv_io_capacity. */
#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0)))
/* The "innodb_stats_method" setting, decides how InnoDB is going
to treat NULL value when collecting statistics. It is not defined
as enum type because the configure option takes unsigned integer type. */
extern ulong srv_innodb_stats_method;
#ifdef UNIV_LOG_ARCHIVE
extern ibool srv_log_archive_on;
extern ibool srv_archive_recovery;
......@@ -363,6 +368,19 @@ enum {
in connection with recovery */
};
/* Alternatives for srv_innodb_stats_method, which could be changed by
setting innodb_stats_method */
enum srv_stats_method_name_enum {
SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as
equal. This is the default setting
for innodb_stats_method */
SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as
NOT equal. */
SRV_STATS_NULLS_IGNORED /* NULL values are ignored */
};
typedef enum srv_stats_method_name_enum srv_stats_method_name_t;
#ifndef UNIV_HOTBACKUP
/** Types of threads existing in the system. */
enum srv_thread_type {
......
......@@ -862,6 +862,10 @@ cmp_rec_rec_with_match(
const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
dict_index_t* index, /*!< in: data dictionary index */
ibool nulls_unequal,
/* in: TRUE if this is for index statistics
cardinality estimation, and innodb_stats_method
is "nulls_unequal" or "nulls_ignored" */
ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when the function returns,
contains the value the for current
......@@ -961,9 +965,13 @@ cmp_rec_rec_with_match(
|| rec2_f_len == UNIV_SQL_NULL) {
if (rec1_f_len == rec2_f_len) {
goto next_field;
/* This is limited to stats collection,
cannot use it for regular search */
if (nulls_unequal) {
ret = -1;
} else {
goto next_field;
}
} else if (rec2_f_len == UNIV_SQL_NULL) {
/* We define the SQL null to be the
......
......@@ -243,6 +243,11 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
/* variable counts amount of data read in total (in bytes) */
UNIV_INTERN ulint srv_data_read = 0;
/* Internal setting for "innodb_stats_method". Decides how InnoDB treats
NULL value when collecting statistics. By default, it is set to
SRV_STATS_NULLS_EQUAL(0), ie. all NULL value are treated equal */
ulong srv_innodb_stats_method = SRV_STATS_NULLS_EQUAL;
/* here we count the amount of data written in total (in bytes) */
UNIV_INTERN ulint srv_data_written = 0;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment