Commit 2cc16653 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-30812: Improve output cardinality estimates for hash join

Introduce @@optimizer_switch flag: hash_join_cardinality

When it is on, use EITS statistics to produce tighter bounds for
hash join output cardinality.

Reviewed by: Monty <monty@mariadb.org>
parent cc348115
create table t1 (a int, b int, c int);
insert into t1 select seq,seq/2, seq/4 from seq_1_to_100;
create table t2 (a int, b int, c int);
insert into t2 select seq, seq/2, seq/4 from seq_1_to_200;
analyze table t1,t2 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
test.t2 analyze status Engine-independent statistics collected
test.t2 analyze status OK
set optimizer_trace=1;
set join_cache_level=6;
set optimizer_switch='hash_join_cardinality=on';
explain select *
from t1, t2
where t1.a=t2.a and t1.a=t2.b and t1.c=t2.c;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
1 SIMPLE t2 hash_ALL NULL #hash#$hj 15 test.t1.a,test.t1.a,test.t1.c 200 Using where; Using join buffer (flat, BNLH join)
set @json= (select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
JS
[
{
"hash_join_columns":
[
{
"field": "a",
"avg_frequency": 1
},
{
"field": "b",
"avg_frequency": 2
},
{
"field": "c",
"avg_frequency": 3.9216
}
],
"rows": 1
}
]
select json_detailed(json_extract(@json, '$**.rest_of_plan[*].rows_for_plan'))
as ROWS_FOR_PLAN;
ROWS_FOR_PLAN
[
100
]
explain select *
from t1, t2 where t1.c=t2.c;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
1 SIMPLE t2 hash_ALL NULL #hash#$hj 5 test.t1.c 200 Using where; Using join buffer (flat, BNLH join)
set @json= (select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
JS
[
{
"hash_join_columns":
[
{
"field": "c",
"avg_frequency": 3.9216
}
],
"rows": 3.9216
}
]
select json_detailed(json_extract(@json, '$**.rest_of_plan[*].rows_for_plan'))
as ROWS_FOR_PLAN;
ROWS_FOR_PLAN
[
392.16
]
explain select *
from t1 straight_join t2 where t1.c=t2.c and t2.a<30;
id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 Using where
1 SIMPLE t2 hash_ALL NULL #hash#$hj 5 test.t1.c 200 Using where; Using join buffer (flat, BNLH join)
set @json= (select trace from information_schema.optimizer_trace);
# Note that rows is the same:
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
JS
[
{
"hash_join_columns":
[
{
"field": "c",
"avg_frequency": 3.9216
}
],
"rows": 3.9216
}
]
# Despite available selectivity:
select json_detailed(json_extract(@json, '$**.selectivity_for_columns')) as JS;
JS
[
[
{
"column_name": "a",
"ranges":
[
"NULL < a < 30"
],
"selectivity_from_histogram": 0.1484375
}
]
]
drop table t1,t2;
--source include/have_sequence.inc
create table t1 (a int, b int, c int);
insert into t1 select seq,seq/2, seq/4 from seq_1_to_100;
create table t2 (a int, b int, c int);
insert into t2 select seq, seq/2, seq/4 from seq_1_to_200;
analyze table t1,t2 persistent for all;
set optimizer_trace=1;
set join_cache_level=6;
set optimizer_switch='hash_join_cardinality=on';
explain select *
from t1, t2
where t1.a=t2.a and t1.a=t2.b and t1.c=t2.c;
set @json= (select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
select json_detailed(json_extract(@json, '$**.rest_of_plan[*].rows_for_plan'))
as ROWS_FOR_PLAN;
explain select *
from t1, t2 where t1.c=t2.c;
set @json= (select trace from information_schema.optimizer_trace);
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
select json_detailed(json_extract(@json, '$**.rest_of_plan[*].rows_for_plan'))
as ROWS_FOR_PLAN;
explain select *
from t1 straight_join t2 where t1.c=t2.c and t2.a<30;
set @json= (select trace from information_schema.optimizer_trace);
--echo # Note that rows is the same:
select json_detailed(json_extract(@json, '$**.hash_join_cardinality')) as JS;
--echo # Despite available selectivity:
select json_detailed(json_extract(@json, '$**.selectivity_for_columns')) as JS;
drop table t1,t2;
......@@ -734,7 +734,8 @@ The following specify which files/extra groups are read (specified before remain
extended_keys, exists_to_in, orderby_uses_equalities,
condition_pushdown_for_derived, split_materialized,
condition_pushdown_for_subquery, rowid_filter,
condition_pushdown_from_having, not_null_range_scan
condition_pushdown_from_having, not_null_range_scan,
hash_join_cardinality
--optimizer-trace=name
Controls tracing of the Optimizer:
optimizer_trace=option=val[,option=val...], where option
......
......@@ -38,7 +38,7 @@ SET @@session.session_track_system_variables='optimizer_switch';
set optimizer_switch='index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off';
-- Tracker : SESSION_TRACK_SYSTEM_VARIABLES
-- optimizer_switch
-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off
-- index_merge=off,index_merge_union=off,index_merge_sort_union=off,index_merge_intersection=off,index_merge_sort_intersection=on,engine_condition_pushdown=on,index_condition_pushdown=off,derived_merge=off,derived_with_keys=off,firstmatch=off,loosescan=off,materialization=on,in_to_exists=off,semijoin=off,partial_match_rowid_merge=off,partial_match_table_scan=off,subquery_cache=off,mrr=on,mrr_cost_based=on,mrr_sort_keys=on,outer_join_with_cache=off,semijoin_with_cache=off,join_cache_incremental=off,join_cache_hashed=off,join_cache_bka=off,optimize_join_buffer_size=on,table_elimination=off,extended_keys=off,exists_to_in=off,orderby_uses_equalities=off,condition_pushdown_for_derived=off,split_materialized=on,condition_pushdown_for_subquery=on,rowid_filter=on,condition_pushdown_from_having=on,not_null_range_scan=off,hash_join_cardinality=off
Warnings:
Warning 1681 'engine_condition_pushdown=on' is deprecated and will be removed in a future release
......
......@@ -2449,7 +2449,7 @@ VARIABLE_COMMENT Fine-tune the optimizer behavior
NUMERIC_MIN_VALUE NULL
NUMERIC_MAX_VALUE NULL
NUMERIC_BLOCK_SIZE NULL
ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,default
ENUM_VALUE_LIST index_merge,index_merge_union,index_merge_sort_union,index_merge_intersection,index_merge_sort_intersection,engine_condition_pushdown,index_condition_pushdown,derived_merge,derived_with_keys,firstmatch,loosescan,materialization,in_to_exists,semijoin,partial_match_rowid_merge,partial_match_table_scan,subquery_cache,mrr,mrr_cost_based,mrr_sort_keys,outer_join_with_cache,semijoin_with_cache,join_cache_incremental,join_cache_hashed,join_cache_bka,optimize_join_buffer_size,table_elimination,extended_keys,exists_to_in,orderby_uses_equalities,condition_pushdown_for_derived,split_materialized,condition_pushdown_for_subquery,rowid_filter,condition_pushdown_from_having,not_null_range_scan,hash_join_cardinality,default
READ_ONLY NO
COMMAND_LINE_ARGUMENT REQUIRED
VARIABLE_NAME OPTIMIZER_TRACE
......
......@@ -234,6 +234,7 @@
#define OPTIMIZER_SWITCH_USE_ROWID_FILTER (1ULL << 33)
#define OPTIMIZER_SWITCH_COND_PUSHDOWN_FROM_HAVING (1ULL << 34)
#define OPTIMIZER_SWITCH_NOT_NULL_RANGE_SCAN (1ULL << 35)
#define OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY (1ULL << 36)
#define OPTIMIZER_SWITCH_DEFAULT (OPTIMIZER_SWITCH_INDEX_MERGE | \
OPTIMIZER_SWITCH_INDEX_MERGE_UNION | \
......
......@@ -7677,6 +7677,90 @@ double adjust_quick_cost(double quick_cost, ha_rows records)
}
/*
@brief
Compute the fanout of hash join operation using EITS data
*/
double hash_join_fanout(JOIN *join, JOIN_TAB *s, table_map remaining_tables,
double rnd_records, KEYUSE *hj_start_key)
{
THD *thd= join->thd;
/*
Before doing the hash join, we will scan the table and apply the local part
of the WHERE condition. This will produce rnd_records.
The EITS statistics describes the entire table. Calling
table->field[N]->get_avg_frequency()
produces average #rows in the table with some value.
What happens if we filter out rows so that rnd_records rows are left?
Something between the two outcomes:
A. filtering removes a fraction of rows for each value:
avg_frequency=avg_frequency * condition_selectivity
B. filtering removes entire groups of rows with the same value, but
the remaining groups remain of the same size.
We make pessimistic assumption and assume B.
We also handle an edge case: if rnd_records is less than avg_frequency,
assume we'll get rnd_records rows with the same value, and return
rnd_records as the fanout estimate.
*/
double min_freq= rnd_records;
Json_writer_object trace_obj(thd, "hash_join_cardinality");
/*
There can be multiple KEYUSE referring to same or different columns
KEYUSE(tbl.col1 = ...)
KEYUSE(tbl.col1 = ...)
KEYUSE(tbl.col2 = ...)
Hash join code can use multiple columns: (col1, col2) for joining.
We need n_distinct({col1, col2}).
EITS only has statistics on individual columns: n_distinct(col1),
n_distinct(col2).
Our current solution is to be very conservative and use selectivity
of one column with the lowest avg_frequency.
In the future, we should an approach that cautiosly takes into account
multiple KEYUSEs either multiply by number of equalities or by sqrt
of the second most selective equality.
*/
Json_writer_array trace_arr(thd, "hash_join_columns");
for (KEYUSE *keyuse= hj_start_key;
keyuse->table == s->table && is_hash_join_key_no(keyuse->key);
keyuse++)
{
if (!(remaining_tables & keyuse->used_tables) &&
(!keyuse->validity_ref || *keyuse->validity_ref) &&
s->access_from_tables_is_allowed(keyuse->used_tables,
join->sjm_lookup_tables))
{
Field *field= s->table->field[keyuse->keypart];
if (is_eits_usable(field))
{
double freq= field->read_stats->get_avg_frequency();
Json_writer_object trace_field(thd);
trace_field.add("field",field->field_name.str).
add("avg_frequency", freq);
if (freq < min_freq)
min_freq= freq;
}
}
}
trace_arr.end();
trace_obj.add("rows", min_freq);
return min_freq;
}
/**
Find the best access path for an extension of a partial execution
plan and add this path to the plan.
......@@ -8292,7 +8376,10 @@ best_access_path(JOIN *join,
(!(s->table->map & join->outer_join) ||
join->allowed_outer_join_with_cache)) // (2)
{
double fanout;
Json_writer_object trace_access_hash(thd);
trace_access_hash.add("type", "hash");
trace_access_hash.add("index", "hj-key");
double join_sel= 0.1;
/* Estimate the cost of the hash join access to the table */
double rnd_records= matching_candidates_in_table(s, found_constraint,
......@@ -8302,25 +8389,55 @@ best_access_path(JOIN *join,
double cmp_time= (s->records - rnd_records)/TIME_FOR_COMPARE;
tmp= COST_ADD(tmp, cmp_time);
DBUG_ASSERT(hj_start_key);
fanout= rnd_records;
if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY))
{
/*
Starting from this point, rnd_records should not be used anymore.
Use "fanout" for an estimate of # matching records.
*/
fanout= hash_join_fanout(join, s, remaining_tables, rnd_records,
hj_start_key);
join_sel= 1.0; // Don't do the "10% heuristic"
}
/* We read the table as many times as join buffer becomes full. */
double refills= (1.0 + floor((double) cache_record_length(join,idx) *
record_count /
(double) thd->variables.join_buff_size));
tmp= COST_MULT(tmp, refills);
best_time= COST_ADD(tmp,
COST_MULT((record_count*join_sel) / TIME_FOR_COMPARE,
rnd_records));
// Add cost of reading/writing the join buffer
if (optimizer_flag(thd, OPTIMIZER_SWITCH_HASH_JOIN_CARDINALITY))
{
/* Set it to be 1/10th of TIME_FOR_COMPARE */
double row_copy_cost= 1.0 / (10*TIME_FOR_COMPARE);
double join_buffer_operations=
COST_ADD(
COST_MULT(record_count, row_copy_cost),
COST_MULT(record_count, fanout * (idx - join->const_tables))
);
double jbuf_use_cost= row_copy_cost * join_buffer_operations;
trace_access_hash.add("jbuf_use_cost", jbuf_use_cost);
tmp= COST_ADD(tmp, jbuf_use_cost);
}
double where_cost= COST_MULT((fanout*join_sel) / TIME_FOR_COMPARE,
record_count);
trace_access_hash.add("extra_cond_check_cost", where_cost);
best_time= COST_ADD(tmp, where_cost);
best= tmp;
records= rnd_records;
records= fanout;
best_key= hj_start_key;
best_ref_depends_map= 0;
best_uses_jbuf= TRUE;
best_filter= 0;
best_type= JT_HASH;
trace_access_hash.add("type", "hash");
trace_access_hash.add("index", "hj-key");
trace_access_hash.add("cost", rnd_records);
trace_access_hash.add("records", records);
trace_access_hash.add("cost", best);
trace_access_hash.add("chosen", true);
}
......
......@@ -2754,6 +2754,7 @@ export const char *optimizer_switch_names[]=
"rowid_filter",
"condition_pushdown_from_having",
"not_null_range_scan",
"hash_join_cardinality",
"default",
NullS
};
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment