Commit 6a2ef557 authored by konstantin@mysql.com's avatar konstantin@mysql.com

WL#926 "AVG(DISTINCT) and other distincts", part 2 (out of 3): clean up

Item_sum_count_distinct, and
deploy Unique for use with COUNT(DISTINCT) if there is no blob
column in the list of DISTINCT arguments.
parent 98e83555
...@@ -116,7 +116,7 @@ count(distinct n) ...@@ -116,7 +116,7 @@ count(distinct n)
5000 5000
show status like 'Created_tmp_disk_tables'; show status like 'Created_tmp_disk_tables';
Variable_name Value Variable_name Value
Created_tmp_disk_tables 1 Created_tmp_disk_tables 0
drop table t1; drop table t1;
create table t1 (s text); create table t1 (s text);
flush status; flush status;
......
...@@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6; ...@@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
MAX(id) MAX(id)
NULL NULL
DROP TABLE t1; DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(400));
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
("B"), ("b"), ("b "), ("b ");
SELECT COUNT(DISTINCT a) FROM t1;
COUNT(DISTINCT a)
2
DROP TABLE t1;
...@@ -98,60 +98,60 @@ DROP TABLE t1; ...@@ -98,60 +98,60 @@ DROP TABLE t1;
CREATE TABLE t1 (id INTEGER); CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER); CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1); INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1; INSERT INTO t1 (id) SELECT id FROM t1;
INSERT INTO t1 (id) SELECT id FROM t2;
/* 8 */ /* 8 */
INSERT INTO t1 (id) SELECT id FROM t2; INSERT INTO t1 (id) SELECT id FROM t1;
/* 12 */ /* 12 */
INSERT INTO t1 (id) SELECT id FROM t2; INSERT INTO t1 (id) SELECT id FROM t1;
/* 16 */ /* 16 */
INSERT INTO t1 (id) SELECT id FROM t2; INSERT INTO t1 (id) SELECT id FROM t1;
/* 20 */ /* 20 */
INSERT INTO t1 (id) SELECT id FROM t2; INSERT INTO t1 (id) SELECT id FROM t1;
/* 24 */ /* 24 */
DELETE FROM t2; INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t2 (id) SELECT id+1 FROM t1; INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+4 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t2 (id) SELECT id+2 FROM t1; INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+32 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t2 (id) SELECT id+4 FROM t1; INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+256 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+512 FROM t1;
INSERT INTO t2 (id) SELECT id+8 FROM t1; SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
INSERT INTO t1 SELECT id FROM t2; AVG(DISTINCT id)
DELETE FROM t2; 513.5000
INSERT INTO t2 (id) SELECT id+16 FROM t1; 508.0000
INSERT INTO t1 SELECT id FROM t2; 509.0000
DELETE FROM t2; 510.0000
INSERT INTO t2 (id) SELECT id+32 FROM t1; 511.0000
INSERT INTO t1 SELECT id FROM t2; 512.0000
DELETE FROM t2; 513.0000
INSERT INTO t2 (id) SELECT id+64 FROM t1; 514.0000
INSERT INTO t1 SELECT id FROM t2; 515.0000
DELETE FROM t2; 516.0000
INSERT INTO t2 (id) SELECT id+128 FROM t1; 517.0000
INSERT INTO t1 SELECT id FROM t2; 511.5000
DELETE FROM t2; 512.5000
INSERT INTO t2 (id) SELECT id+256 FROM t1; SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
INSERT INTO t1 SELECT id FROM t2; SUM(DISTINCT id)/COUNT(DISTINCT id)
DELETE FROM t2; 513.50000
INSERT INTO t2 (id) SELECT id+512 FROM t1; 508.00000
INSERT INTO t1 SELECT id FROM t2; 509.00000
DELETE FROM t2; 510.00000
INSERT INTO t2 (id) SELECT id+1024 FROM t1; 511.00000
INSERT INTO t1 SELECT id FROM t2; 512.00000
DELETE FROM t2; 513.00000
INSERT INTO t2 (id) SELECT id+2048 FROM t1; 514.00000
INSERT INTO t1 SELECT id FROM t2; 515.00000
DELETE FROM t2; 516.00000
INSERT INTO t2 (id) SELECT id+4096 FROM t1; 517.00000
INSERT INTO t1 SELECT id FROM t2; 511.50000
DELETE FROM t2; 512.50000
INSERT INTO t2 (id) SELECT id+8192 FROM t1; INSERT INTO t1 SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+2048 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id+8192 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand(); INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT SUM(DISTINCT id) sm FROM t1; SELECT SUM(DISTINCT id) sm FROM t1;
sm sm
......
...@@ -591,3 +591,13 @@ INSERT INTO t1 VALUES ...@@ -591,3 +591,13 @@ INSERT INTO t1 VALUES
(1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1); (1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1);
SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6; SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
DROP TABLE t1; DROP TABLE t1;
#
# Test that new VARCHAR correctly works with COUNT(DISTINCT)
#
CREATE TABLE t1 (a VARCHAR(400));
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
("B"), ("b"), ("b "), ("b ");
SELECT COUNT(DISTINCT a) FROM t1;
DROP TABLE t1;
...@@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER); ...@@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER); CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1); INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1; INSERT INTO t1 (id) SELECT id FROM t1; /* 8 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 8 */ INSERT INTO t1 (id) SELECT id FROM t1; /* 12 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 12 */ INSERT INTO t1 (id) SELECT id FROM t1; /* 16 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 16 */ INSERT INTO t1 (id) SELECT id FROM t1; /* 20 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 20 */ INSERT INTO t1 (id) SELECT id FROM t1; /* 24 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 24 */ INSERT INTO t1 SELECT id+1 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t2 (id) SELECT id+1 FROM t1; INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+8 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t2 (id) SELECT id+2 FROM t1; INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+64 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t2 (id) SELECT id+4 FROM t1; INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+512 FROM t1;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8 FROM t1; # Just test that AVG(DISTINCT) is there
INSERT INTO t1 SELECT id FROM t2; SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
DELETE FROM t2; SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
INSERT INTO t2 (id) SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+1024 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+2048 FROM t1;
INSERT INTO t2 (id) SELECT id+32 FROM t1; INSERT INTO t1 SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+8192 FROM t1;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+512 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+16384 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+32768 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+65536 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand(); INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
# SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++'; # SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
......
This diff is collapsed.
...@@ -239,6 +239,7 @@ class Item_sum_avg_distinct: public Item_sum_distinct ...@@ -239,6 +239,7 @@ class Item_sum_avg_distinct: public Item_sum_distinct
public: public:
Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {} Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
void fix_length_and_dec();
virtual void calculate_val_and_count(); virtual void calculate_val_and_count();
enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; } enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
const char *func_name() const { return "avg_distinct"; } const char *func_name() const { return "avg_distinct"; }
...@@ -280,68 +281,44 @@ class TMP_TABLE_PARAM; ...@@ -280,68 +281,44 @@ class TMP_TABLE_PARAM;
class Item_sum_count_distinct :public Item_sum_int class Item_sum_count_distinct :public Item_sum_int
{ {
TABLE *table; TABLE *table;
table_map used_table_cache;
uint32 *field_lengths; uint32 *field_lengths;
TMP_TABLE_PARAM *tmp_table_param; TMP_TABLE_PARAM *tmp_table_param;
TREE tree_base; /*
TREE *tree; If there are no blobs, we can use a tree, which
is faster than heap table. In that case, we still use the table
to help get things set up, but we insert nothing in it
*/
Unique *tree;
/* /*
Following is 0 normal object and pointer to original one for copy Following is 0 normal object and pointer to original one for copy
(to correctly free resources) (to correctly free resources)
*/ */
Item_sum_count_distinct *original; Item_sum_count_distinct *original;
uint tree_key_length;
uint key_length;
CHARSET_INFO *key_charset;
/*
Calculated based on max_heap_table_size. If reached,
walk the tree and dump it into MyISAM table
*/
uint max_elements_in_tree;
/*
The first few bytes of record ( at least one)
are just markers for deleted and NULLs. We want to skip them since
they will just bloat the tree without providing any valuable info
*/
int rec_offset;
/*
If there are no blobs, we can use a tree, which
is faster than heap table. In that case, we still use the table
to help get things set up, but we insert nothing in it
*/
bool use_tree;
bool always_null; // Set to 1 if the result is always NULL bool always_null; // Set to 1 if the result is always NULL
int tree_to_myisam();
friend int composite_key_cmp(void* arg, byte* key1, byte* key2); friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2); friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
friend int simple_raw_key_cmp(void* arg, byte* key1, byte* key2);
friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
Item_sum_count_distinct* item);
public: public:
Item_sum_count_distinct(List<Item> &list) Item_sum_count_distinct(List<Item> &list)
:Item_sum_int(list), table(0), used_table_cache(~(table_map) 0), :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
tmp_table_param(0), tree(&tree_base), original(0), use_tree(0), tree(0), original(0), always_null(FALSE)
always_null(0)
{ quick_group= 0; } { quick_group= 0; }
Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item) Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
:Item_sum_int(thd, item), table(item->table), :Item_sum_int(thd, item), table(item->table),
used_table_cache(item->used_table_cache),
field_lengths(item->field_lengths), field_lengths(item->field_lengths),
tmp_table_param(item->tmp_table_param), tmp_table_param(item->tmp_table_param),
tree(item->tree), original(item), key_length(item->key_length), tree(item->tree), original(item), tree_key_length(item->tree_key_length),
max_elements_in_tree(item->max_elements_in_tree),
rec_offset(item->rec_offset), use_tree(item->use_tree),
always_null(item->always_null) always_null(item->always_null)
{} {}
~Item_sum_count_distinct();
void cleanup(); void cleanup();
table_map used_tables() const { return used_table_cache; }
enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; } enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
void clear(); void clear();
bool add(); bool add();
......
...@@ -1831,6 +1831,7 @@ class Unique :public Sql_alloc ...@@ -1831,6 +1831,7 @@ class Unique :public Sql_alloc
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
uint size_arg, ulong max_in_memory_size_arg); uint size_arg, ulong max_in_memory_size_arg);
~Unique(); ~Unique();
ulong elements_in_tree() { return tree.elements_in_tree; }
inline bool unique_add(void *ptr) inline bool unique_add(void *ptr)
{ {
DBUG_ENTER("unique_add"); DBUG_ENTER("unique_add");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment