Commit eb6a9ad7 authored by Sergei Petrunia's avatar Sergei Petrunia

MDEV-26886: Estimation for filtered rows less precise with JSON histogram

- Make Histogram_json_hb::range_selectivity handle singleton buckets
  specially when computing selectivity of the max. endpoint bound.
  (for min. endpoint, we already do that).

- Also, fixed comments for Histogram_json_hb::find_bucket
parent 106c785e
......@@ -7658,3 +7658,28 @@ test.t1 analyze status OK
ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk;
INSERT INTO t1 (f) VALUES ('bar');
DROP TABLE t1;
#
# MDEV-26886: Estimation for filtered rows less precise with JSON histogram
#
create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100;
select count(*) from t1 where a <= 0;
count(*)
33
set histogram_type = JSON_HB, histogram_size=default;
analyze table t1 persistent for all;
Table Op Msg_type Msg_text
test.t1 analyze status Engine-independent statistics collected
test.t1 analyze status OK
analyze select * from t1 where a <= 0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 33.00 33.00 Using where
analyze select * from t1 where a < 0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 1.47 0.00 Using where
analyze select * from t1 where a > 0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 67.00 67.00 Using where
analyze select * from t1 where a >= 0;
id select_type table type possible_keys key key_len ref rows r_rows filtered r_filtered Extra
1 SIMPLE t1 ALL NULL NULL NULL NULL 100 100.00 100.00 100.00 Using where
drop table t1;
......@@ -340,3 +340,17 @@ ANALYZE TABLE t1 PERSISTENT FOR ALL;
ALTER TABLE t1 MODIFY f TEXT, ORDER BY pk;
INSERT INTO t1 (f) VALUES ('bar');
DROP TABLE t1;
--echo #
--echo # MDEV-26886: Estimation for filtered rows less precise with JSON histogram
--echo #
create table t1 (a tinyint) as select if(seq%3,seq,0) as a from seq_1_to_100;
select count(*) from t1 where a <= 0;
set histogram_type = JSON_HB, histogram_size=default;
analyze table t1 persistent for all;
analyze select * from t1 where a <= 0;
analyze select * from t1 where a < 0;
analyze select * from t1 where a > 0;
analyze select * from t1 where a >= 0;
drop table t1;
......@@ -743,9 +743,22 @@ double Histogram_json_hb::range_selectivity(Field *field, key_range *min_endp,
idx--;
}
double left_fract= get_left_fract(idx);
double sel= position_in_interval(field, max_key, max_key_len,
buckets[idx].start_value,
get_end_value(idx));
double sel;
/* Special handling for singleton buckets */
if (buckets[idx].ndv == 1 && equal)
{
if (inclusive_endp)
sel= 1.0;
else
sel= 0.0;
}
else
{
sel= position_in_interval(field, max_key, max_key_len,
buckets[idx].start_value,
get_end_value(idx));
}
max= left_fract + sel * (buckets[idx].cum_fract - left_fract);
}
else
......@@ -763,26 +776,18 @@ void Histogram_json_hb::serialize(Field *field)
/*
Find the rightmost histogram bucket such that "lookup_val $GT start_value".
$GT is either '>' or '>=' depending on equal_is_less parameter.
@param equal_is_less Controls what to do if a histogram bound is equal to the
lookup_val.
@detail
Possible cases:
1. The regular case: the value falls into some bucket.
@brief
Find the leftmost histogram bucket such that "lookup_val >= start_value".
2. The value is less than the minimum of the first bucket
3. The value is greater than the maximum of the last bucket
In these cases we "clip" to the first/last bucket.
@param field Field object (used to do value comparisons)
@param lookup_val The lookup value in KeyTupleFormat.
@param equal OUT TRUE<=> the found bucket has left_bound=lookup_val
4. The value hits the bucket boundary. Then, we need to know whether the
point of interest is to the left the constant, or to the right of it.
@return
The bucket index
*/
int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
int Histogram_json_hb::find_bucket(const Field *field, const uchar *lookup_val,
bool *equal)
{
int res;
......@@ -797,7 +802,8 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
if (!res)
{
*equal= true;
return middle;
low= middle;
goto end;
}
else if (res < 0)
low= middle;
......@@ -806,25 +812,25 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
}
/*
If low and high were assigned a value in the above loop, then they are not
equal to the lookup value:
If low and high were assigned a value in the above loop and we got here,
then the following holds:
bucket[low] < lookup_val < bucket[high]
bucket[low].start_value < lookup_val < bucket[high].start_value
But there are two special cases: low=0 and high=last_bucket. Handle them
below.
Besides that, there are two special cases: low=0 and high=last_bucket.
Handle them below.
*/
if (low == 0)
{
res= field->key_cmp((uchar*)buckets[0].start_value.data(), lookup_val);
if (!res)
*equal= true;
else if (res < 0)
else if (res < 0) // buckets[0] < lookup_val
{
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
if (!res)
*equal= true;
if (res >= 0)
if (res <= 0) // buckets[high] <= lookup_val
low= high;
}
}
......@@ -833,9 +839,19 @@ int Histogram_json_hb::find_bucket(Field *field, const uchar *lookup_val,
res= field->key_cmp((uchar*)buckets[high].start_value.data(), lookup_val);
if (!res)
*equal= true;
if (res >= 0)
if (res <= 0)
low= high;
}
end:
// Verification: *equal==TRUE <=> lookup value is equal to the found bucket.
DBUG_ASSERT(*equal == !(field->key_cmp((uchar*)buckets[low].start_value.data(),
lookup_val)));
// buckets[low] <= lookup_val, with one exception of the first bucket.
DBUG_ASSERT(low == 0 ||
field->key_cmp((uchar*)buckets[low].start_value.data(), lookup_val)<= 0);
// buckets[low+1] > lookup_val, with one exception of the last bucket
DBUG_ASSERT(low == (int)buckets.size()-1 ||
field->key_cmp((uchar*)buckets[low+1].start_value.data(), lookup_val)> 0);
return low;
}
......@@ -124,6 +124,6 @@ class Histogram_json_hb : public Histogram_base
private:
double get_left_fract(int idx);
std::string& get_end_value(int idx);
int find_bucket(Field *field, const uchar *lookup_val, bool *equal);
int find_bucket(const Field *field, const uchar *lookup_val, bool *equal);
};
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment