Commit 10f0530b authored by Igor Babaev's avatar Igor Babaev

Fixed bug mdev-4369.

The function was adjusted to be able to aggregate
the counters of the merged elements. 
Before this change it was not possible to guarantee the correctness
of the counters passed to the the call-back parameter walk_action.
As a result, when some elements of a Unique object were flushed into
disk the function passed to merge_walk() as the call-back parameter
could return wrong counters of elements. This could lead to building
wrong histograms. 
parent 1c30fb2a
...@@ -1511,4 +1511,43 @@ test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0000FF3FFF7FFFBFFFFF ...@@ -1511,4 +1511,43 @@ test t1 a 1 5 0.0000 1.0000 10 DOUBLE_PREC_HB 0000FF3FFF7FFFBFFFFF
set histogram_size=default; set histogram_size=default;
set histogram_type=default; set histogram_type=default;
drop table t1; drop table t1;
#
# Bug mdev-4369: histogram for a column with many distinct values
#
CREATE TABLE t1 (id int);
CREATE TABLE t2 (id int);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t1 (id) SELECT id FROM t1;
INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id+512 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT COUNT(*) FROM t2;
COUNT(*)
8192
SELECT COUNT(DISTINCT id) FROM t2;
COUNT(DISTINCT id)
1024
set @@tmp_table_size=1024*16;
set @@max_heap_table_size=1024*16;
set histogram_size=63;
analyze table t2 persistent for all;
Table Op Msg_type Msg_text
test.t2 analyze status OK
select db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
db_name table_name column_name min_value max_value nulls_ratio avg_frequency hist_size hist_type HEX(histogram)
test t2 id 1 1024 0.0000 8.0000 63 SINGLE_PREC_HB 03070B0F13171B1F23272B2F33373B3F43474B4F53575B5F63676B6F73777B7F83878B8F93979B9FA3A7ABAFB3B7BBBFC3C7CBCFD3D7DBDFE3E7EBEFF3F7FB
set histogram_size=default;
drop table t1, t2;
set use_stat_tables=@save_use_stat_tables; set use_stat_tables=@save_use_stat_tables;
...@@ -638,5 +638,48 @@ set histogram_type=default; ...@@ -638,5 +638,48 @@ set histogram_type=default;
drop table t1; drop table t1;
--echo #
--echo # Bug mdev-4369: histogram for a column with many distinct values
--echo #
CREATE TABLE t1 (id int);
CREATE TABLE t2 (id int);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t1 (id) SELECT id FROM t1;
INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id+512 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT COUNT(*) FROM t2;
SELECT COUNT(DISTINCT id) FROM t2;
set @@tmp_table_size=1024*16;
set @@max_heap_table_size=1024*16;
set histogram_size=63;
analyze table t2 persistent for all;
select db_name, table_name, column_name,
min_value, max_value,
nulls_ratio, avg_frequency,
hist_size, hist_type, HEX(histogram)
FROM mysql.column_stats;
set histogram_size=default;
drop table t1, t2;
set use_stat_tables=@save_use_stat_tables; set use_stat_tables=@save_use_stat_tables;
...@@ -3997,6 +3997,7 @@ class Unique :public Sql_alloc ...@@ -3997,6 +3997,7 @@ class Unique :public Sql_alloc
uint size; uint size;
uint full_size; uint full_size;
uint min_dupl_count; /* always 0 for unions, > 0 for intersections */ uint min_dupl_count; /* always 0 for unions, > 0 for intersections */
bool with_counters;
bool merge(TABLE *table, uchar *buff, bool without_last_merge); bool merge(TABLE *table, uchar *buff, bool without_last_merge);
......
...@@ -1348,7 +1348,7 @@ class Count_distinct_field: public Sql_alloc ...@@ -1348,7 +1348,7 @@ class Count_distinct_field: public Sql_alloc
tree_key_length= field->pack_length(); tree_key_length= field->pack_length();
tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field, tree= new Unique((qsort_cmp2) simple_str_key_cmp, (void*) field,
tree_key_length, max_heap_table_size); tree_key_length, max_heap_table_size, 1);
} }
virtual ~Count_distinct_field() virtual ~Count_distinct_field()
...@@ -1435,7 +1435,7 @@ class Count_distinct_field_bit: public Count_distinct_field ...@@ -1435,7 +1435,7 @@ class Count_distinct_field_bit: public Count_distinct_field
tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp, tree= new Unique((qsort_cmp2) simple_ulonglong_key_cmp,
(void*) &tree_key_length, (void*) &tree_key_length,
tree_key_length, max_heap_table_size); tree_key_length, max_heap_table_size, 1);
} }
bool add() bool add()
......
...@@ -86,6 +86,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg, ...@@ -86,6 +86,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
full_size= size; full_size= size;
if (min_dupl_count_arg) if (min_dupl_count_arg)
full_size+= sizeof(element_count); full_size+= sizeof(element_count);
with_counters= test(min_dupl_count_arg);
my_b_clear(&file); my_b_clear(&file);
init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func, init_tree(&tree, (ulong) (max_in_memory_size / 16), 0, size, comp_func,
NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC)); NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
...@@ -428,6 +429,22 @@ static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2) ...@@ -428,6 +429,22 @@ static int buffpek_compare(void *arg, uchar *key_ptr1, uchar *key_ptr2)
C_MODE_END C_MODE_END
inline
element_count get_counter_from_merged_element(void *ptr, uint ofs)
{
element_count cnt;
memcpy((uchar *) &cnt, (uchar *) ptr + ofs, sizeof(element_count));
return cnt;
}
inline
void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt)
{
memcpy((uchar *) ptr + ofs, (uchar *) &cnt, sizeof(element_count));
}
/* /*
DESCRIPTION DESCRIPTION
...@@ -457,6 +474,8 @@ C_MODE_END ...@@ -457,6 +474,8 @@ C_MODE_END
file file with all trees dumped. Trees in the file file file with all trees dumped. Trees in the file
must contain sorted unique values. Cache must be must contain sorted unique values. Cache must be
initialized in read mode. initialized in read mode.
with counters take into account counters for equal merged
elements
RETURN VALUE RETURN VALUE
0 ok 0 ok
<> 0 error <> 0 error
...@@ -466,7 +485,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, ...@@ -466,7 +485,7 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
uint key_length, BUFFPEK *begin, BUFFPEK *end, uint key_length, BUFFPEK *begin, BUFFPEK *end,
tree_walk_action walk_action, void *walk_action_arg, tree_walk_action walk_action, void *walk_action_arg,
qsort_cmp2 compare, void *compare_arg, qsort_cmp2 compare, void *compare_arg,
IO_CACHE *file) IO_CACHE *file, bool with_counters)
{ {
BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg }; BUFFPEK_COMPARE_CONTEXT compare_context = { compare, compare_arg };
QUEUE queue; QUEUE queue;
...@@ -485,6 +504,8 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, ...@@ -485,6 +504,8 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
uint bytes_read; /* to hold return value of read_to_buffer */ uint bytes_read; /* to hold return value of read_to_buffer */
BUFFPEK *top; BUFFPEK *top;
int res= 1; int res= 1;
uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
element_count cnt;
/* /*
Invariant: queue must contain top element from each tree, until a tree Invariant: queue must contain top element from each tree, until a tree
is not completely walked through. is not completely walked through.
...@@ -543,9 +564,17 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, ...@@ -543,9 +564,17 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
/* new top has been obtained; if old top is unique, apply the action */ /* new top has been obtained; if old top is unique, apply the action */
if (compare(compare_arg, old_key, top->key)) if (compare(compare_arg, old_key, top->key))
{ {
if (walk_action(old_key, 1, walk_action_arg)) cnt= with_counters ?
get_counter_from_merged_element(old_key, cnt_ofs) : 1;
if (walk_action(old_key, cnt, walk_action_arg))
goto end; goto end;
} }
else if (with_counters)
{
cnt= get_counter_from_merged_element(top->key, cnt_ofs);
cnt+= get_counter_from_merged_element(old_key, cnt_ofs);
put_counter_into_merged_element(top->key, cnt_ofs, cnt);
}
} }
/* /*
Applying walk_action to the tail of the last tree: this is safe because Applying walk_action to the tail of the last tree: this is safe because
...@@ -556,7 +585,10 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size, ...@@ -556,7 +585,10 @@ static bool merge_walk(uchar *merge_buffer, ulong merge_buffer_size,
{ {
do do
{ {
if (walk_action(top->key, 1, walk_action_arg))
cnt= with_counters ?
get_counter_from_merged_element(top->key, cnt_ofs) : 1;
if (walk_action(top->key, cnt, walk_action_arg))
goto end; goto end;
top->key+= key_length; top->key+= key_length;
} }
...@@ -620,7 +652,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg) ...@@ -620,7 +652,7 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
(BUFFPEK *) file_ptrs.buffer, (BUFFPEK *) file_ptrs.buffer,
(BUFFPEK *) file_ptrs.buffer + file_ptrs.elements, (BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
action, walk_action_arg, action, walk_action_arg,
tree.compare, tree.custom_arg, &file); tree.compare, tree.custom_arg, &file, with_counters);
} }
my_free(merge_buffer); my_free(merge_buffer);
return res; return res;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment