Commit 97816c1f authored by unknown's avatar unknown

WL#926 "AVG(DISTINCT) and other distincts", part 2 (out of 3): clean up

Item_sum_count_distinct, and
deploy Unique for use with COUNT(DISTINCT) if there is no blob
column in the list of DISTINCT arguments.


mysql-test/r/count_distinct2.result:
  Test results fixed.
mysql-test/r/func_group.result:
  Updated.
mysql-test/r/sum_distinct.result:
  Updated.
mysql-test/t/func_group.test:
  Add a test for COUNT(DISTINCT) and true varchar and case-insensitive
  collation. The table in the test contains only two distinct values.
mysql-test/t/sum_distinct.test:
  Since now we support INSERT INTO t1 (a) SELECT a+1 FROM t1, shorten
  the test.
  Add a nominal test for AVG(DISTINCT)
sql/item_sum.cc:
  Implementation of cleaned up Item_sum_count_distinct.
  Fixed a bug with COUNT(DISTINCT) and new VARCHAR and collations.
  Fixed a bug wiht AVG(DISTINCT) and wrong number of output digits
  after decimal point.
sql/item_sum.h:
  Cleanup for Item_sum_count_distinct.
  Now if the list of distinct arguments doesn't contain a blob column,
  we always use Unique and merge-sort to find distinct values.
sql/sql_class.h:
  Added a short-cut to find number of elements in Unique if all elements fit
  into memory.
parent ba133947
...@@ -116,7 +116,7 @@ count(distinct n) ...@@ -116,7 +116,7 @@ count(distinct n)
5000 5000
show status like 'Created_tmp_disk_tables'; show status like 'Created_tmp_disk_tables';
Variable_name Value Variable_name Value
Created_tmp_disk_tables 1 Created_tmp_disk_tables 0
drop table t1; drop table t1;
create table t1 (s text); create table t1 (s text);
flush status; flush status;
......
...@@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6; ...@@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
MAX(id) MAX(id)
NULL NULL
DROP TABLE t1; DROP TABLE t1;
CREATE TABLE t1 (a VARCHAR(400));
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
("B"), ("b"), ("b "), ("b ");
SELECT COUNT(DISTINCT a) FROM t1;
COUNT(DISTINCT a)
2
DROP TABLE t1;
...@@ -98,60 +98,60 @@ DROP TABLE t1; ...@@ -98,60 +98,60 @@ DROP TABLE t1;
CREATE TABLE t1 (id INTEGER); CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER); CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1); INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1; INSERT INTO t1 (id) SELECT id FROM t1;
INSERT INTO t1 (id) SELECT id FROM t2;
/* 8 */ /* 8 */
INSERT INTO t1 (id) SELECT id FROM t2; INSERT INTO t1 (id) SELECT id FROM t1;
/* 12 */ /* 12 */
INSERT INTO t1 (id) SELECT id FROM t2; INSERT INTO t1 (id) SELECT id FROM t1;
/* 16 */ /* 16 */
INSERT INTO t1 (id) SELECT id FROM t2; INSERT INTO t1 (id) SELECT id FROM t1;
/* 20 */ /* 20 */
INSERT INTO t1 (id) SELECT id FROM t2; INSERT INTO t1 (id) SELECT id FROM t1;
/* 24 */ /* 24 */
DELETE FROM t2; INSERT INTO t1 SELECT id+1 FROM t1;
INSERT INTO t2 (id) SELECT id+1 FROM t1; INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+4 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+8 FROM t1;
INSERT INTO t2 (id) SELECT id+2 FROM t1; INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+32 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+64 FROM t1;
INSERT INTO t2 (id) SELECT id+4 FROM t1; INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+256 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+512 FROM t1;
INSERT INTO t2 (id) SELECT id+8 FROM t1; SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
INSERT INTO t1 SELECT id FROM t2; AVG(DISTINCT id)
DELETE FROM t2; 513.5000
INSERT INTO t2 (id) SELECT id+16 FROM t1; 508.0000
INSERT INTO t1 SELECT id FROM t2; 509.0000
DELETE FROM t2; 510.0000
INSERT INTO t2 (id) SELECT id+32 FROM t1; 511.0000
INSERT INTO t1 SELECT id FROM t2; 512.0000
DELETE FROM t2; 513.0000
INSERT INTO t2 (id) SELECT id+64 FROM t1; 514.0000
INSERT INTO t1 SELECT id FROM t2; 515.0000
DELETE FROM t2; 516.0000
INSERT INTO t2 (id) SELECT id+128 FROM t1; 517.0000
INSERT INTO t1 SELECT id FROM t2; 511.5000
DELETE FROM t2; 512.5000
INSERT INTO t2 (id) SELECT id+256 FROM t1; SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
INSERT INTO t1 SELECT id FROM t2; SUM(DISTINCT id)/COUNT(DISTINCT id)
DELETE FROM t2; 513.50000
INSERT INTO t2 (id) SELECT id+512 FROM t1; 508.00000
INSERT INTO t1 SELECT id FROM t2; 509.00000
DELETE FROM t2; 510.00000
INSERT INTO t2 (id) SELECT id+1024 FROM t1; 511.00000
INSERT INTO t1 SELECT id FROM t2; 512.00000
DELETE FROM t2; 513.00000
INSERT INTO t2 (id) SELECT id+2048 FROM t1; 514.00000
INSERT INTO t1 SELECT id FROM t2; 515.00000
DELETE FROM t2; 516.00000
INSERT INTO t2 (id) SELECT id+4096 FROM t1; 517.00000
INSERT INTO t1 SELECT id FROM t2; 511.50000
DELETE FROM t2; 512.50000
INSERT INTO t2 (id) SELECT id+8192 FROM t1; INSERT INTO t1 SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+2048 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id+8192 FROM t1;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand(); INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
SELECT SUM(DISTINCT id) sm FROM t1; SELECT SUM(DISTINCT id) sm FROM t1;
sm sm
......
...@@ -591,3 +591,13 @@ INSERT INTO t1 VALUES ...@@ -591,3 +591,13 @@ INSERT INTO t1 VALUES
(1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1); (1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1);
SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6; SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
DROP TABLE t1; DROP TABLE t1;
#
# Test that new VARCHAR correctly works with COUNT(DISTINCT)
#
CREATE TABLE t1 (a VARCHAR(400));
INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a "),
("B"), ("b"), ("b "), ("b ");
SELECT COUNT(DISTINCT a) FROM t1;
DROP TABLE t1;
...@@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER); ...@@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER);
CREATE TABLE t2 (id INTEGER); CREATE TABLE t2 (id INTEGER);
INSERT INTO t1 (id) VALUES (1), (1), (1),(1); INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
INSERT INTO t2 (id) SELECT id FROM t1; INSERT INTO t1 (id) SELECT id FROM t1; /* 8 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 8 */ INSERT INTO t1 (id) SELECT id FROM t1; /* 12 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 12 */ INSERT INTO t1 (id) SELECT id FROM t1; /* 16 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 16 */ INSERT INTO t1 (id) SELECT id FROM t1; /* 20 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 20 */ INSERT INTO t1 (id) SELECT id FROM t1; /* 24 */
INSERT INTO t1 (id) SELECT id FROM t2; /* 24 */ INSERT INTO t1 SELECT id+1 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+2 FROM t1;
INSERT INTO t2 (id) SELECT id+1 FROM t1; INSERT INTO t1 SELECT id+4 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+8 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+16 FROM t1;
INSERT INTO t2 (id) SELECT id+2 FROM t1; INSERT INTO t1 SELECT id+32 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+64 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+128 FROM t1;
INSERT INTO t2 (id) SELECT id+4 FROM t1; INSERT INTO t1 SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+512 FROM t1;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8 FROM t1; # Just test that AVG(DISTINCT) is there
INSERT INTO t1 SELECT id FROM t2; SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
DELETE FROM t2; SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
INSERT INTO t2 (id) SELECT id+16 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+1024 FROM t1;
DELETE FROM t2; INSERT INTO t1 SELECT id+2048 FROM t1;
INSERT INTO t2 (id) SELECT id+32 FROM t1; INSERT INTO t1 SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2; INSERT INTO t1 SELECT id+8192 FROM t1;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+64 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+128 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+256 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+512 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+1024 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+2048 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+4096 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
INSERT INTO t2 (id) SELECT id+8192 FROM t1;
INSERT INTO t1 SELECT id FROM t2;
DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+16384 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+32768 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
#INSERT INTO t2 (id) SELECT id+65536 FROM t1;
#INSERT INTO t1 SELECT id FROM t2;
#DELETE FROM t2;
INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand(); INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
# SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++'; # SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';
......
...@@ -719,6 +719,18 @@ String *Item_sum_distinct::val_str(String *str) ...@@ -719,6 +719,18 @@ String *Item_sum_distinct::val_str(String *str)
/* Item_sum_avg_distinct */ /* Item_sum_avg_distinct */
void
Item_sum_avg_distinct::fix_length_and_dec()
{
Item_sum_distinct::fix_length_and_dec();
/*
AVG() will divide val by count. We need to reserve digits
after decimal point as the result can be fractional.
*/
decimals+= 4;
}
void void
Item_sum_avg_distinct::calculate_val_and_count() Item_sum_avg_distinct::calculate_val_and_count()
{ {
...@@ -2115,12 +2127,8 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf) ...@@ -2115,12 +2127,8 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf)
int simple_str_key_cmp(void* arg, byte* key1, byte* key2) int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
{ {
Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg; Field *f= (Field*) arg;
CHARSET_INFO *cs=item->key_charset; return f->cmp(key1, key2);
uint len=item->key_length;
return cs->coll->strnncollsp(cs,
(const uchar*) key1, len,
(const uchar*) key2, len, 0);
} }
/* /*
...@@ -2149,54 +2157,42 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2) ...@@ -2149,54 +2157,42 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
return 0; return 0;
} }
/*
helper function for walking the tree when we dump it to MyISAM -
tree_walk will call it for each leaf
*/
int dump_leaf(byte* key, uint32 count __attribute__((unused)), C_MODE_START
Item_sum_count_distinct* item)
static int count_distinct_walk(void *elem, unsigned int count, void *arg)
{ {
byte* buf = item->table->record[0]; (*((ulonglong*)arg))++;
int error;
/*
The first item->rec_offset bytes are taken care of with
restore_record(table,default_values) in setup()
*/
memcpy(buf + item->rec_offset, key, item->tree->size_of_element);
if ((error = item->table->file->write_row(buf)))
{
if (error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
return 1;
}
return 0; return 0;
} }
C_MODE_END
void Item_sum_count_distinct::cleanup() void Item_sum_count_distinct::cleanup()
{ {
DBUG_ENTER("Item_sum_count_distinct::cleanup"); DBUG_ENTER("Item_sum_count_distinct::cleanup");
Item_sum_int::cleanup(); Item_sum_int::cleanup();
/*
Free table and tree if they belong to this item (if item have not pointer /* Free objects only if we own them. */
to original item from which was made copy => it own its objects )
*/
if (!original) if (!original)
{ {
/*
We need to delete the table and the tree in cleanup() as
they were allocated in the runtime memroot. Using the runtime
memroot reduces memory footprint for PS/SP and simplifies setup().
*/
delete tree;
tree= 0;
if (table) if (table)
{ {
free_tmp_table(current_thd, table); free_tmp_table(table->in_use, table);
table= 0; table= 0;
} }
delete tmp_table_param; delete tmp_table_param;
tmp_table_param= 0; tmp_table_param= 0;
if (use_tree)
{
delete_tree(tree);
use_tree= 0;
}
} }
always_null= FALSE;
DBUG_VOID_RETURN; DBUG_VOID_RETURN;
} }
...@@ -2207,8 +2203,15 @@ void Item_sum_count_distinct::make_unique() ...@@ -2207,8 +2203,15 @@ void Item_sum_count_distinct::make_unique()
{ {
table=0; table=0;
original= 0; original= 0;
use_tree= 0; // to prevent delete_tree call on uninitialized tree tree= 0;
tree= &tree_base; tmp_table_param= 0;
always_null= FALSE;
}
Item_sum_count_distinct::~Item_sum_count_distinct()
{
cleanup();
} }
...@@ -2216,9 +2219,14 @@ bool Item_sum_count_distinct::setup(THD *thd) ...@@ -2216,9 +2219,14 @@ bool Item_sum_count_distinct::setup(THD *thd)
{ {
List<Item> list; List<Item> list;
SELECT_LEX *select_lex= thd->lex->current_select; SELECT_LEX *select_lex= thd->lex->current_select;
if (select_lex->linkage == GLOBAL_OPTIONS_TYPE)
return 1; /*
Setup can be called twice for ROLLUP items. This is a bug.
Please add DBUG_ASSERT(tree == 0) here when it's fixed.
*/
if (tree || table || tmp_table_param)
return FALSE;
if (!(tmp_table_param= new TMP_TABLE_PARAM)) if (!(tmp_table_param= new TMP_TABLE_PARAM))
return 1; return 1;
...@@ -2238,11 +2246,7 @@ bool Item_sum_count_distinct::setup(THD *thd) ...@@ -2238,11 +2246,7 @@ bool Item_sum_count_distinct::setup(THD *thd)
if (always_null) if (always_null)
return 0; return 0;
count_field_types(tmp_table_param,list,0); count_field_types(tmp_table_param,list,0);
if (table) DBUG_ASSERT(table == 0);
{
free_tmp_table(thd, table);
tmp_table_param->cleanup();
}
if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1, if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
0, 0,
select_lex->options | thd->options, select_lex->options | thd->options,
...@@ -2251,123 +2255,77 @@ bool Item_sum_count_distinct::setup(THD *thd) ...@@ -2251,123 +2255,77 @@ bool Item_sum_count_distinct::setup(THD *thd)
table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows table->file->extra(HA_EXTRA_NO_ROWS); // Don't update rows
table->no_rows=1; table->no_rows=1;
// no blobs, otherwise it would be MyISAM
if (table->s->db_type == DB_TYPE_HEAP) if (table->s->db_type == DB_TYPE_HEAP)
{ {
/*
No blobs, otherwise it would have been MyISAM: set up a compare
function and its arguments to use with Unique.
*/
qsort_cmp2 compare_key; qsort_cmp2 compare_key;
void* cmp_arg; void* cmp_arg;
Field **field= table->field;
Field **field_end= field + table->s->fields;
bool all_binary= TRUE;
// to make things easier for dump_leaf if we ever have to dump to MyISAM for (tree_key_length= 0; field < field_end; ++field)
restore_record(table,s->default_values);
if (table->s->fields == 1)
{ {
/* Field *f= *field;
If we have only one field, which is the most common use of enum enum_field_types type= f->type();
count(distinct), it is much faster to use a simpler key tree_key_length+= f->pack_length();
compare method that can take advantage of not having to worry if (!f->binary() && (type == MYSQL_TYPE_STRING ||
about other fields type == MYSQL_TYPE_VAR_STRING ||
*/ type == MYSQL_TYPE_VARCHAR))
Field* field = table->field[0]; {
switch (field->type()) { all_binary= FALSE;
case MYSQL_TYPE_STRING: break;
case MYSQL_TYPE_VAR_STRING:
if (field->binary())
{
compare_key = (qsort_cmp2)simple_raw_key_cmp;
cmp_arg = (void*) &key_length;
}
else
{
/*
If we have a string, we must take care of charsets and case
sensitivity
*/
compare_key = (qsort_cmp2)simple_str_key_cmp;
cmp_arg = (void*) this;
}
break;
default:
/*
Since at this point we cannot have blobs anything else can
be compared with memcmp
*/
compare_key = (qsort_cmp2)simple_raw_key_cmp;
cmp_arg = (void*) &key_length;
break;
} }
key_charset = field->charset();
key_length = field->pack_length();
rec_offset = 1;
} }
else // too bad, cannot cheat - there is more than one field if (all_binary)
{ {
bool all_binary = 1; cmp_arg= (void*) &tree_key_length;
Field** field, **field_end; compare_key= (qsort_cmp2) simple_raw_key_cmp;
field_end = (field = table->field) + table->s->fields; }
uint32 *lengths; else
if (!(field_lengths= {
(uint32*) thd->alloc(sizeof(uint32) * table->s->fields))) if (table->s->fields == 1)
return 1;
for (key_length = 0, lengths=field_lengths; field < field_end; ++field)
{
uint32 length= (*field)->pack_length();
key_length += length;
*lengths++ = length;
if (!(*field)->binary())
all_binary = 0; // Can't break loop here
}
rec_offset= table->s->reclength - key_length;
if (all_binary)
{ {
compare_key = (qsort_cmp2)simple_raw_key_cmp; /*
cmp_arg = (void*) &key_length; If we have only one field, which is the most common use of
count(distinct), it is much faster to use a simpler key
compare method that can take advantage of not having to worry
about other fields.
*/
compare_key= (qsort_cmp2) simple_str_key_cmp;
cmp_arg= (void*) table->field[0];
/* tree_key_length has been set already */
} }
else else
{ {
compare_key = (qsort_cmp2) composite_key_cmp ; uint32 *length;
cmp_arg = (void*) this; compare_key= (qsort_cmp2) composite_key_cmp;
cmp_arg= (void*) this;
field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
for (tree_key_length= 0, length= field_lengths, field= table->field;
field < field_end; ++field, ++length)
{
*length= (*field)->pack_length();
tree_key_length+= *length;
}
} }
} }
DBUG_ASSERT(tree == 0);
if (use_tree) tree= new Unique(compare_key, cmp_arg, tree_key_length,
delete_tree(tree); thd->variables.max_heap_table_size);
init_tree(tree, min(thd->variables.max_heap_table_size,
thd->variables.sortbuff_size/16), 0,
key_length, compare_key, 0, NULL, cmp_arg);
use_tree = 1;
/* /*
The only time key_length could be 0 is if someone does The only time tree_key_length could be 0 is if someone does
count(distinct) on a char(0) field - stupid thing to do, count(distinct) on a char(0) field - stupid thing to do,
but this has to be handled - otherwise someone can crash but this has to be handled - otherwise someone can crash
the server with a DoS attack the server with a DoS attack
*/ */
max_elements_in_tree = ((key_length) ? if (! tree)
thd->variables.max_heap_table_size/key_length : 1); return TRUE;
}
if (original)
{
original->table= table;
original->use_tree= use_tree;
} }
return 0; return FALSE;
}
int Item_sum_count_distinct::tree_to_myisam()
{
if (create_myisam_from_heap(current_thd, table, tmp_table_param,
HA_ERR_RECORD_FILE_FULL, 1) ||
tree_walk(tree, (tree_walk_action)&dump_leaf, (void*)this,
left_root_right))
return 1;
delete_tree(tree);
use_tree = 0;
return 0;
} }
...@@ -2379,8 +2337,9 @@ Item *Item_sum_count_distinct::copy_or_same(THD* thd) ...@@ -2379,8 +2337,9 @@ Item *Item_sum_count_distinct::copy_or_same(THD* thd)
void Item_sum_count_distinct::clear() void Item_sum_count_distinct::clear()
{ {
if (use_tree) /* tree and table can be both null only if always_null */
reset_tree(tree); if (tree)
tree->reset();
else if (table) else if (table)
{ {
table->file->extra(HA_EXTRA_NO_CACHE); table->file->extra(HA_EXTRA_NO_CACHE);
...@@ -2401,32 +2360,21 @@ bool Item_sum_count_distinct::add() ...@@ -2401,32 +2360,21 @@ bool Item_sum_count_distinct::add()
if ((*field)->is_real_null(0)) if ((*field)->is_real_null(0))
return 0; // Don't count NULL return 0; // Don't count NULL
if (use_tree) if (tree)
{ {
/* /*
If the tree got too big, convert to MyISAM, otherwise insert into the The first few bytes of record (at least one) are just markers
tree. for deleted and NULLs. We want to skip them since they will
bloat the tree without providing any valuable info. Besides,
key_length used to initialize the tree didn't include space for them.
*/ */
if (tree->elements_in_tree > max_elements_in_tree) return tree->unique_add(table->record[0] + table->s->null_bytes);
{
if (tree_to_myisam())
return 1;
}
else if (!tree_insert(tree, table->record[0] + rec_offset, 0,
tree->custom_arg))
return 1;
}
else if ((error=table->file->write_row(table->record[0])))
{
if (error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
{
if (create_myisam_from_heap(current_thd, table, tmp_table_param, error,
1))
return 1; // Not a table_is_full error
}
} }
return 0; if ((error= table->file->write_row(table->record[0])) &&
error != HA_ERR_FOUND_DUPP_KEY &&
error != HA_ERR_FOUND_DUPP_UNIQUE)
return TRUE;
return FALSE;
} }
...@@ -2435,8 +2383,16 @@ longlong Item_sum_count_distinct::val_int() ...@@ -2435,8 +2383,16 @@ longlong Item_sum_count_distinct::val_int()
DBUG_ASSERT(fixed == 1); DBUG_ASSERT(fixed == 1);
if (!table) // Empty query if (!table) // Empty query
return LL(0); return LL(0);
if (use_tree) if (tree)
return tree->elements_in_tree; {
ulonglong count;
if (tree->elements == 0)
return (longlong) tree->elements_in_tree(); // everything fits in memory
count= 0;
tree->walk(count_distinct_walk, (void*) &count);
return (longlong) count;
}
table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK); table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
return table->file->records; return table->file->records;
} }
......
...@@ -239,6 +239,7 @@ class Item_sum_avg_distinct: public Item_sum_distinct ...@@ -239,6 +239,7 @@ class Item_sum_avg_distinct: public Item_sum_distinct
public: public:
Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {} Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}
void fix_length_and_dec();
virtual void calculate_val_and_count(); virtual void calculate_val_and_count();
enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; } enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
const char *func_name() const { return "avg_distinct"; } const char *func_name() const { return "avg_distinct"; }
...@@ -280,68 +281,44 @@ class TMP_TABLE_PARAM; ...@@ -280,68 +281,44 @@ class TMP_TABLE_PARAM;
class Item_sum_count_distinct :public Item_sum_int class Item_sum_count_distinct :public Item_sum_int
{ {
TABLE *table; TABLE *table;
table_map used_table_cache;
uint32 *field_lengths; uint32 *field_lengths;
TMP_TABLE_PARAM *tmp_table_param; TMP_TABLE_PARAM *tmp_table_param;
TREE tree_base; /*
TREE *tree; If there are no blobs, we can use a tree, which
is faster than heap table. In that case, we still use the table
to help get things set up, but we insert nothing in it
*/
Unique *tree;
/* /*
Following is 0 normal object and pointer to original one for copy Following is 0 normal object and pointer to original one for copy
(to correctly free resources) (to correctly free resources)
*/ */
Item_sum_count_distinct *original; Item_sum_count_distinct *original;
uint tree_key_length;
uint key_length;
CHARSET_INFO *key_charset;
/*
Calculated based on max_heap_table_size. If reached,
walk the tree and dump it into MyISAM table
*/
uint max_elements_in_tree;
/*
The first few bytes of record ( at least one)
are just markers for deleted and NULLs. We want to skip them since
they will just bloat the tree without providing any valuable info
*/
int rec_offset;
/*
If there are no blobs, we can use a tree, which
is faster than heap table. In that case, we still use the table
to help get things set up, but we insert nothing in it
*/
bool use_tree;
bool always_null; // Set to 1 if the result is always NULL bool always_null; // Set to 1 if the result is always NULL
int tree_to_myisam();
friend int composite_key_cmp(void* arg, byte* key1, byte* key2); friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2); friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
friend int simple_raw_key_cmp(void* arg, byte* key1, byte* key2);
friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
Item_sum_count_distinct* item);
public: public:
Item_sum_count_distinct(List<Item> &list) Item_sum_count_distinct(List<Item> &list)
:Item_sum_int(list), table(0), used_table_cache(~(table_map) 0), :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
tmp_table_param(0), tree(&tree_base), original(0), use_tree(0), tree(0), original(0), always_null(FALSE)
always_null(0)
{ quick_group= 0; } { quick_group= 0; }
Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item) Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
:Item_sum_int(thd, item), table(item->table), :Item_sum_int(thd, item), table(item->table),
used_table_cache(item->used_table_cache),
field_lengths(item->field_lengths), field_lengths(item->field_lengths),
tmp_table_param(item->tmp_table_param), tmp_table_param(item->tmp_table_param),
tree(item->tree), original(item), key_length(item->key_length), tree(item->tree), original(item), tree_key_length(item->tree_key_length),
max_elements_in_tree(item->max_elements_in_tree),
rec_offset(item->rec_offset), use_tree(item->use_tree),
always_null(item->always_null) always_null(item->always_null)
{} {}
~Item_sum_count_distinct();
void cleanup(); void cleanup();
table_map used_tables() const { return used_table_cache; }
enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; } enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
void clear(); void clear();
bool add(); bool add();
......
...@@ -1831,6 +1831,7 @@ class Unique :public Sql_alloc ...@@ -1831,6 +1831,7 @@ class Unique :public Sql_alloc
Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg, Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
uint size_arg, ulong max_in_memory_size_arg); uint size_arg, ulong max_in_memory_size_arg);
~Unique(); ~Unique();
ulong elements_in_tree() { return tree.elements_in_tree; }
inline bool unique_add(void *ptr) inline bool unique_add(void *ptr)
{ {
DBUG_ENTER("unique_add"); DBUG_ENTER("unique_add");
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment