WL#926 "AVG(DISTINCT) and other distincts", part 2 (out of 3): clean up

Item_sum_count_distinct, and deploy Unique for use with COUNT(DISTINCT) if there is no blob column in the list of DISTINCT arguments. mysql-test/r/count_distinct2.result: Test results fixed. mysql-test/r/func_group.result: Updated. mysql-test/r/sum_distinct.result: Updated. mysql-test/t/func_group.test: Add a test for COUNT(DISTINCT) and true varchar and case-insensitive collation. The table in the test contains only two distinct values. mysql-test/t/sum_distinct.test: Since now we support INSERT INTO t1 (a) SELECT a+1 FROM t1, shorten the test. Add a nominal test for AVG(DISTINCT) sql/item_sum.cc: Implementation of cleaned up Item_sum_count_distinct. Fixed a bug with COUNT(DISTINCT) and new VARCHAR and collations. Fixed a bug wiht AVG(DISTINCT) and wrong number of output digits after decimal point. sql/item_sum.h: Cleanup for Item_sum_count_distinct. Now if the list of distinct arguments doesn't contain a blob column, we always use Unique and merge-sort to find distinct values. sql/sql_class.h: Added a short-cut to find number of elements in Unique if all elements fit into memory.

WL#926 "AVG(DISTINCT) and other distincts", part 2 (out of 3): clean up
Item_sum_count_distinct, and deploy Unique for use with COUNT(DISTINCT) if there is no blob column in the list of DISTINCT arguments. mysql-test/r/count_distinct2.result: Test results fixed. mysql-test/r/func_group.result: Updated. mysql-test/r/sum_distinct.result: Updated. mysql-test/t/func_group.test: Add a test for COUNT(DISTINCT) and true varchar and case-insensitive collation. The table in the test contains only two distinct values. mysql-test/t/sum_distinct.test: Since now we support INSERT INTO t1 (a) SELECT a+1 FROM t1, shorten the test. Add a nominal test for AVG(DISTINCT) sql/item_sum.cc: Implementation of cleaned up Item_sum_count_distinct. Fixed a bug with COUNT(DISTINCT) and new VARCHAR and collations. Fixed a bug wiht AVG(DISTINCT) and wrong number of output digits after decimal point. sql/item_sum.h: Cleanup for Item_sum_count_distinct. Now if the list of distinct arguments doesn't contain a blob column, we always use Unique and merge-sort to find distinct values. sql/sql_class.h: Added a short-cut to find number of elements in Unique if all elements fit into memory.
897849e5 · unknown · ce4f43cb · 897849e5 · 897849e5 · 897849e5
Commit 897849e5 authored Mar 15, 2005 by unknown
8 changed files
--- a/mysql-test/r/count_distinct2.result
+++ b/mysql-test/r/count_distinct2.result
@@ -116,7 +116,7 @@ count(distinct n)
 5000
 show status like 'Created_tmp_disk_tables';
 Variable_name	Value
-Created_tmp_disk_tables	1
+Created_tmp_disk_tables	0
 drop table t1;
 create table t1 (s text);
 flush status;

--- a/mysql-test/r/func_group.result
+++ b/mysql-test/r/func_group.result
@@ -881,3 +881,10 @@ SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
 MAX(id)
 NULL
 DROP TABLE t1;
+CREATE TABLE t1 (a VARCHAR(400));
+INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a   "),
+("B"), ("b"), ("b "), ("b   ");
+SELECT COUNT(DISTINCT a) FROM t1;
+COUNT(DISTINCT a)
+2
+DROP TABLE t1;
--- a/mysql-test/r/sum_distinct.result
+++ b/mysql-test/r/sum_distinct.result
@@ -98,60 +98,60 @@ DROP TABLE t1;
 CREATE TABLE t1 (id INTEGER);
 CREATE TABLE t2 (id INTEGER);
 INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
-INSERT INTO t2 (id) SELECT id FROM t1;
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 8 */
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 12 */
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 16 */
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 20 */
-INSERT INTO t1 (id) SELECT id FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1;
 /* 24 */
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+1 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+2 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+4 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+8 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+16 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+32 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+64 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+128 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+256 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+512 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+1024 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+2048 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+4096 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+8192 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
+INSERT INTO t1 SELECT id+1 FROM t1;
+INSERT INTO t1 SELECT id+2 FROM t1;
+INSERT INTO t1 SELECT id+4 FROM t1;
+INSERT INTO t1 SELECT id+8 FROM t1;
+INSERT INTO t1 SELECT id+16 FROM t1;
+INSERT INTO t1 SELECT id+32 FROM t1;
+INSERT INTO t1 SELECT id+64 FROM t1;
+INSERT INTO t1 SELECT id+128 FROM t1;
+INSERT INTO t1 SELECT id+256 FROM t1;
+INSERT INTO t1 SELECT id+512 FROM t1;
+SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
+AVG(DISTINCT id)
+513.5000
+508.0000
+509.0000
+510.0000
+511.0000
+512.0000
+513.0000
+514.0000
+515.0000
+516.0000
+517.0000
+511.5000
+512.5000
+SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
+SUM(DISTINCT id)/COUNT(DISTINCT id)
+513.50000
+508.00000
+509.00000
+510.00000
+511.00000
+512.00000
+513.00000
+514.00000
+515.00000
+516.00000
+517.00000
+511.50000
+512.50000
+INSERT INTO t1 SELECT id+1024 FROM t1;
+INSERT INTO t1 SELECT id+2048 FROM t1;
+INSERT INTO t1 SELECT id+4096 FROM t1;
+INSERT INTO t1 SELECT id+8192 FROM t1;
 INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();
 SELECT SUM(DISTINCT id) sm FROM t1;
 sm

--- a/mysql-test/t/func_group.test
+++ b/mysql-test/t/func_group.test
@@ -591,3 +591,13 @@ INSERT INTO t1 VALUES
  (1,1,4), (2,2,1), (3,1,3), (4,2,1), (5,1,1);
 SELECT MAX(id) FROM t1 WHERE id < 3 AND a=2 AND b=6;
 DROP TABLE t1;
+
+#
+# Test that new VARCHAR correctly works with COUNT(DISTINCT)
+#
+
+CREATE TABLE t1 (a VARCHAR(400));
+INSERT INTO t1 (a) VALUES ("A"), ("a"), ("a "), ("a   "),
+                          ("B"), ("b"), ("b "), ("b   ");
+SELECT COUNT(DISTINCT a) FROM t1;
+DROP TABLE t1;
--- a/mysql-test/t/sum_distinct.test
+++ b/mysql-test/t/sum_distinct.test
@@ -103,64 +103,30 @@ CREATE TABLE t1 (id INTEGER);
 CREATE TABLE t2 (id INTEGER);

 INSERT INTO t1 (id) VALUES (1), (1), (1),(1);
-INSERT INTO t2 (id) SELECT id FROM t1;
-INSERT INTO t1 (id) SELECT id FROM t2; /* 8 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 12 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 16 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 20 */
-INSERT INTO t1 (id) SELECT id FROM t2; /* 24 */
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+1 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+2 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+4 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+8 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+16 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+32 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+64 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+128 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+256 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+512 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+1024 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+2048 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+4096 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-INSERT INTO t2 (id) SELECT id+8192 FROM t1;
-INSERT INTO t1 SELECT id FROM t2;
-DELETE FROM t2;
-#INSERT INTO t2 (id) SELECT id+16384 FROM t1;
-#INSERT INTO t1 SELECT id FROM t2;
-#DELETE FROM t2;
-#INSERT INTO t2 (id) SELECT id+32768 FROM t1;
-#INSERT INTO t1 SELECT id FROM t2;
-#DELETE FROM t2;
-#INSERT INTO t2 (id) SELECT id+65536 FROM t1;
-#INSERT INTO t1 SELECT id FROM t2;
-#DELETE FROM t2;
+INSERT INTO t1 (id) SELECT id FROM t1; /* 8 */
+INSERT INTO t1 (id) SELECT id FROM t1; /* 12 */
+INSERT INTO t1 (id) SELECT id FROM t1; /* 16 */
+INSERT INTO t1 (id) SELECT id FROM t1; /* 20 */
+INSERT INTO t1 (id) SELECT id FROM t1; /* 24 */
+INSERT INTO t1 SELECT id+1 FROM t1;
+INSERT INTO t1 SELECT id+2 FROM t1;
+INSERT INTO t1 SELECT id+4 FROM t1;
+INSERT INTO t1 SELECT id+8 FROM t1;
+INSERT INTO t1 SELECT id+16 FROM t1;
+INSERT INTO t1 SELECT id+32 FROM t1;
+INSERT INTO t1 SELECT id+64 FROM t1;
+INSERT INTO t1 SELECT id+128 FROM t1;
+INSERT INTO t1 SELECT id+256 FROM t1;
+INSERT INTO t1 SELECT id+512 FROM t1;
+
+# Just test that AVG(DISTINCT) is there
+SELECT AVG(DISTINCT id) FROM t1 GROUP BY id % 13;
+SELECT SUM(DISTINCT id)/COUNT(DISTINCT id) FROM t1 GROUP BY id % 13;
+
+INSERT INTO t1 SELECT id+1024 FROM t1;
+INSERT INTO t1 SELECT id+2048 FROM t1;
+INSERT INTO t1 SELECT id+4096 FROM t1;
+INSERT INTO t1 SELECT id+8192 FROM t1;
 INSERT INTO t2 SELECT id FROM t1 ORDER BY id*rand();

 # SELECT '++++++++++++++++++++++++++++++++++++++++++++++++++';

--- a/sql/item_sum.cc
+++ b/sql/item_sum.cc
@@ -719,6 +719,18 @@ String *Item_sum_distinct::val_str(String *str)

 /* Item_sum_avg_distinct */

+void
+Item_sum_avg_distinct::fix_length_and_dec()
+{
+  Item_sum_distinct::fix_length_and_dec();
+  /*
+    AVG() will divide val by count. We need to reserve digits
+    after decimal point as the result can be fractional.
+  */
+  decimals+= 4;
+}
+
+
 void
 Item_sum_avg_distinct::calculate_val_and_count()
 {
@@ -2115,12 +2127,8 @@ my_decimal *Item_variance_field::val_decimal(my_decimal *dec_buf)

 int simple_str_key_cmp(void* arg, byte* key1, byte* key2)
 {
-  Item_sum_count_distinct* item = (Item_sum_count_distinct*)arg;
-  CHARSET_INFO *cs=item->key_charset;
-  uint len=item->key_length;
-  return cs->coll->strnncollsp(cs, 
-			       (const uchar*) key1, len, 
-			       (const uchar*) key2, len, 0);
+  Field *f= (Field*) arg;
+  return f->cmp(key1, key2);
 }

 /*
@@ -2149,54 +2157,42 @@ int composite_key_cmp(void* arg, byte* key1, byte* key2)
  return 0;
 }

-/*
-  helper function for walking the tree when we dump it to MyISAM -
-  tree_walk will call it for each leaf
-*/

-int dump_leaf(byte* key, uint32 count __attribute__((unused)),
-		     Item_sum_count_distinct* item)
+C_MODE_START
+
+static int count_distinct_walk(void *elem, unsigned int count, void *arg)
 {
-  byte* buf = item->table->record[0];
-  int error;
-  /*
-    The first item->rec_offset bytes are taken care of with
-    restore_record(table,default_values) in setup()
-  */
-  memcpy(buf + item->rec_offset, key, item->tree->size_of_element);
-  if ((error = item->table->file->write_row(buf)))
-  {
-    if (error != HA_ERR_FOUND_DUPP_KEY &&
-	error != HA_ERR_FOUND_DUPP_UNIQUE)
-      return 1;
-  }
+  (*((ulonglong*)arg))++;
  return 0;
 }

+C_MODE_END
+

 void Item_sum_count_distinct::cleanup()
 {
  DBUG_ENTER("Item_sum_count_distinct::cleanup");
  Item_sum_int::cleanup();
-  /*
-    Free table and tree if they belong to this item (if item have not pointer
-    to original item from which was made copy => it own its objects )
-  */
+
+  /* Free objects only if we own them. */
  if (!original)
  {
+    /*
+      We need to delete the table and the tree in cleanup() as
+      they were allocated in the runtime memroot. Using the runtime
+      memroot reduces memory footprint for PS/SP and simplifies setup().
+    */
+    delete tree;
+    tree= 0;
    if (table)
    {
-      free_tmp_table(current_thd, table);
+      free_tmp_table(table->in_use, table);
      table= 0;
    }
    delete tmp_table_param;
    tmp_table_param= 0;
-    if (use_tree)
-    {
-      delete_tree(tree);
-      use_tree= 0;
-    }
  }
+  always_null= FALSE;
  DBUG_VOID_RETURN;
 }

@@ -2207,8 +2203,15 @@ void Item_sum_count_distinct::make_unique()
 {
  table=0;
  original= 0;
-  use_tree= 0; // to prevent delete_tree call on uninitialized tree
-  tree= &tree_base;
+  tree= 0;
+  tmp_table_param= 0;
+  always_null= FALSE;
+}
+
+
+Item_sum_count_distinct::~Item_sum_count_distinct()
+{
+  cleanup();
 }


@@ -2216,9 +2219,14 @@ bool Item_sum_count_distinct::setup(THD *thd)
 {
  List<Item> list;
  SELECT_LEX *select_lex= thd->lex->current_select;
-  if (select_lex->linkage == GLOBAL_OPTIONS_TYPE)
-    return 1;
-    
+
+  /*
+    Setup can be called twice for ROLLUP items. This is a bug.
+    Please add DBUG_ASSERT(tree == 0) here when it's fixed.
+  */
+  if (tree || table || tmp_table_param)
+    return FALSE;
+
  if (!(tmp_table_param= new TMP_TABLE_PARAM))
    return 1;

@@ -2238,11 +2246,7 @@ bool Item_sum_count_distinct::setup(THD *thd)
  if (always_null)
    return 0;
  count_field_types(tmp_table_param,list,0);
-  if (table)
-  {
-    free_tmp_table(thd, table);
-    tmp_table_param->cleanup();
-  }
+  DBUG_ASSERT(table == 0);
  if (!(table= create_tmp_table(thd, tmp_table_param, list, (ORDER*) 0, 1,
 				0,
 				select_lex->options | thd->options,
@@ -2251,123 +2255,77 @@ bool Item_sum_count_distinct::setup(THD *thd)
  table->file->extra(HA_EXTRA_NO_ROWS);		// Don't update rows
  table->no_rows=1;

-
-  // no blobs, otherwise it would be MyISAM
  if (table->s->db_type == DB_TYPE_HEAP)
  {
+    /*
+      No blobs, otherwise it would have been MyISAM: set up a compare
+      function and its arguments to use with Unique.
+    */
    qsort_cmp2 compare_key;
    void* cmp_arg;
+    Field **field= table->field;
+    Field **field_end= field + table->s->fields;
+    bool all_binary= TRUE;

-    // to make things easier for dump_leaf if we ever have to dump to MyISAM
-    restore_record(table,s->default_values);
-
-    if (table->s->fields == 1)
+    for (tree_key_length= 0; field < field_end; ++field)
    {
-      /*
-	If we have only one field, which is the most common use of
-	count(distinct), it is much faster to use a simpler key
-	compare method that can take advantage of not having to worry
-	about other fields
-      */
-      Field* field = table->field[0];
-      switch (field->type()) {
-      case MYSQL_TYPE_STRING:
-      case MYSQL_TYPE_VAR_STRING:
-	if (field->binary())
-	{
-	  compare_key = (qsort_cmp2)simple_raw_key_cmp;
-	  cmp_arg = (void*) &key_length;
-	}
-	else
-	{
-	  /*
-	    If we have a string, we must take care of charsets and case
-	    sensitivity
-	  */
-	  compare_key = (qsort_cmp2)simple_str_key_cmp;
-	  cmp_arg = (void*) this;
-	}
-	break;
-      default:
-	/*
-	  Since at this point we cannot have blobs anything else can
-	  be compared with memcmp
-	*/
-	compare_key = (qsort_cmp2)simple_raw_key_cmp;
-	cmp_arg = (void*) &key_length;
-	break;
+      Field *f= *field;
+      enum enum_field_types type= f->type();
+      tree_key_length+= f->pack_length();
+      if (!f->binary() && (type == MYSQL_TYPE_STRING ||
+                           type == MYSQL_TYPE_VAR_STRING ||
+                           type == MYSQL_TYPE_VARCHAR))
+      {
+        all_binary= FALSE;
+        break;
      }
-      key_charset = field->charset();
-      key_length  = field->pack_length();
-      rec_offset  = 1;
    }
-    else // too bad, cannot cheat - there is more than one field
+    if (all_binary)
    {
-      bool all_binary = 1;
-      Field** field, **field_end;
-      field_end = (field = table->field) + table->s->fields;
-      uint32 *lengths;
-      if (!(field_lengths= 
-	    (uint32*) thd->alloc(sizeof(uint32) * table->s->fields)))
-	return 1;
-
-      for (key_length = 0, lengths=field_lengths; field < field_end; ++field)
-      {
-	uint32 length= (*field)->pack_length();
-	key_length += length;
-	*lengths++ = length;
-	if (!(*field)->binary())
-	  all_binary = 0;			// Can't break loop here
-      }
-      rec_offset= table->s->reclength - key_length;
-      if (all_binary)
+      cmp_arg= (void*) &tree_key_length;
+      compare_key= (qsort_cmp2) simple_raw_key_cmp;
+    }
+    else
+    {
+      if (table->s->fields == 1)
      {
-	compare_key = (qsort_cmp2)simple_raw_key_cmp;
-	cmp_arg = (void*) &key_length;
+        /*
+          If we have only one field, which is the most common use of
+          count(distinct), it is much faster to use a simpler key
+          compare method that can take advantage of not having to worry
+          about other fields.
+        */
+        compare_key= (qsort_cmp2) simple_str_key_cmp;
+        cmp_arg= (void*) table->field[0];
+        /* tree_key_length has been set already */
      }
      else
      {
-	compare_key = (qsort_cmp2) composite_key_cmp ;
-	cmp_arg = (void*) this;
+        uint32 *length;
+        compare_key= (qsort_cmp2) composite_key_cmp;
+        cmp_arg= (void*) this;
+        field_lengths= (uint32*) thd->alloc(table->s->fields * sizeof(uint32));
+        for (tree_key_length= 0, length= field_lengths, field= table->field;
+             field < field_end; ++field, ++length)
+        {
+          *length= (*field)->pack_length();
+          tree_key_length+= *length;
+        }
      }
    }
-
-    if (use_tree)
-      delete_tree(tree);
-    init_tree(tree, min(thd->variables.max_heap_table_size,
-			thd->variables.sortbuff_size/16), 0,
-	      key_length, compare_key, 0, NULL, cmp_arg);
-    use_tree = 1;
-
+    DBUG_ASSERT(tree == 0);
+    tree= new Unique(compare_key, cmp_arg, tree_key_length,
+                     thd->variables.max_heap_table_size);
    /*
-      The only time key_length could be 0 is if someone does
+      The only time tree_key_length could be 0 is if someone does
      count(distinct) on a char(0) field - stupid thing to do,
      but this has to be handled - otherwise someone can crash
      the server with a DoS attack
    */
-    max_elements_in_tree = ((key_length) ? 
-			    thd->variables.max_heap_table_size/key_length : 1);
-
-  }
-  if (original)
-  {
-    original->table= table;
-    original->use_tree= use_tree;
+    if (! tree)
+      return TRUE;
  }
-  return 0;
-}
-
-
-int Item_sum_count_distinct::tree_to_myisam()
-{
-  if (create_myisam_from_heap(current_thd, table, tmp_table_param,
-			      HA_ERR_RECORD_FILE_FULL, 1) ||
-      tree_walk(tree, (tree_walk_action)&dump_leaf, (void*)this,
-		left_root_right))
-    return 1;
-  delete_tree(tree);
-  use_tree = 0;
-  return 0;
+  return FALSE;
 }


@@ -2379,8 +2337,9 @@ Item *Item_sum_count_distinct::copy_or_same(THD* thd)

 void Item_sum_count_distinct::clear()
 {
-  if (use_tree)
-    reset_tree(tree);
+  /* tree and table can be both null only if always_null */
+  if (tree)
+    tree->reset();
  else if (table)
  {
    table->file->extra(HA_EXTRA_NO_CACHE);
@@ -2401,32 +2360,21 @@ bool Item_sum_count_distinct::add()
    if ((*field)->is_real_null(0))
      return 0;					// Don't count NULL

-  if (use_tree)
+  if (tree)
  {
    /*
-      If the tree got too big, convert to MyISAM, otherwise insert into the
-      tree.
+      The first few bytes of record (at least one) are just markers
+      for deleted and NULLs. We want to skip them since they will
+      bloat the tree without providing any valuable info. Besides,
+      key_length used to initialize the tree didn't include space for them.
    */
-    if (tree->elements_in_tree > max_elements_in_tree)
-    {
-      if (tree_to_myisam())
-	return 1;
-    }
-    else if (!tree_insert(tree, table->record[0] + rec_offset, 0,
-			  tree->custom_arg))
-      return 1;
-  }
-  else if ((error=table->file->write_row(table->record[0])))
-  {
-    if (error != HA_ERR_FOUND_DUPP_KEY &&
-	error != HA_ERR_FOUND_DUPP_UNIQUE)
-    {
-      if (create_myisam_from_heap(current_thd, table, tmp_table_param, error,
-				  1))
-	return 1;				// Not a table_is_full error
-    }
+    return tree->unique_add(table->record[0] + table->s->null_bytes);
  }
-  return 0;
+  if ((error= table->file->write_row(table->record[0])) &&
+      error != HA_ERR_FOUND_DUPP_KEY &&
+      error != HA_ERR_FOUND_DUPP_UNIQUE)
+    return TRUE;
+  return FALSE;
 }


@@ -2435,8 +2383,16 @@ longlong Item_sum_count_distinct::val_int()
  DBUG_ASSERT(fixed == 1);
  if (!table)					// Empty query
    return LL(0);
-  if (use_tree)
-    return tree->elements_in_tree;
+  if (tree)
+  {
+    ulonglong count;
+
+    if (tree->elements == 0)
+      return (longlong) tree->elements_in_tree(); // everything fits in memory
+    count= 0;
+    tree->walk(count_distinct_walk, (void*) &count);
+    return (longlong) count;
+  }
  table->file->info(HA_STATUS_VARIABLE | HA_STATUS_NO_LOCK);
  return table->file->records;
 }

--- a/sql/item_sum.h
+++ b/sql/item_sum.h
@@ -239,6 +239,7 @@ class Item_sum_avg_distinct: public Item_sum_distinct
 public:
  Item_sum_avg_distinct(Item *item_arg) : Item_sum_distinct(item_arg) {}

+  void fix_length_and_dec();
  virtual void calculate_val_and_count();
  enum Sumfunctype sum_func () const { return AVG_DISTINCT_FUNC; }
  const char *func_name() const { return "avg_distinct"; }
@@ -280,68 +281,44 @@ class TMP_TABLE_PARAM;
 class Item_sum_count_distinct :public Item_sum_int
 {
  TABLE *table;
-  table_map used_table_cache;
  uint32 *field_lengths;
  TMP_TABLE_PARAM *tmp_table_param;
-  TREE tree_base;
-  TREE *tree;
+  /*
+    If there are no blobs, we can use a tree, which
+    is faster than heap table. In that case, we still use the table
+    to help get things set up, but we insert nothing in it
+  */
+  Unique *tree;
  /*
    Following is 0 normal object and pointer to original one for copy 
    (to correctly free resources)
  */
  Item_sum_count_distinct *original;
+  uint tree_key_length;

-  uint key_length;
-  CHARSET_INFO *key_charset;
-  
-  /*
-    Calculated based on max_heap_table_size. If reached,
-    walk the tree and dump it into MyISAM table
-  */
-  uint max_elements_in_tree;
-
-  /*
-    The first few bytes of record ( at least one)
-    are just markers for deleted and NULLs. We want to skip them since
-    they will just bloat the tree without providing any valuable info
-  */
-  int rec_offset;

-  /*
-    If there are no blobs, we can use a tree, which
-    is faster than heap table. In that case, we still use the table
-    to help get things set up, but we insert nothing in it
-  */
-  bool use_tree;
  bool always_null;		// Set to 1 if the result is always NULL

-  int tree_to_myisam();

  friend int composite_key_cmp(void* arg, byte* key1, byte* key2);
  friend int simple_str_key_cmp(void* arg, byte* key1, byte* key2);
-  friend int simple_raw_key_cmp(void* arg, byte* key1, byte* key2);
-  friend int dump_leaf(byte* key, uint32 count __attribute__((unused)),
-		       Item_sum_count_distinct* item);

-  public:
+public:
  Item_sum_count_distinct(List<Item> &list)
-    :Item_sum_int(list), table(0), used_table_cache(~(table_map) 0),
-     tmp_table_param(0), tree(&tree_base), original(0), use_tree(0),
-     always_null(0)
+    :Item_sum_int(list), table(0), field_lengths(0), tmp_table_param(0),
+     tree(0), original(0), always_null(FALSE)
  { quick_group= 0; }
  Item_sum_count_distinct(THD *thd, Item_sum_count_distinct *item)
    :Item_sum_int(thd, item), table(item->table),
-     used_table_cache(item->used_table_cache),
     field_lengths(item->field_lengths),
     tmp_table_param(item->tmp_table_param),
-     tree(item->tree), original(item), key_length(item->key_length),
-     max_elements_in_tree(item->max_elements_in_tree),
-     rec_offset(item->rec_offset), use_tree(item->use_tree),
+     tree(item->tree), original(item), tree_key_length(item->tree_key_length),
     always_null(item->always_null)
  {}
+  ~Item_sum_count_distinct();
+
  void cleanup();

-  table_map used_tables() const { return used_table_cache; }
  enum Sumfunctype sum_func () const { return COUNT_DISTINCT_FUNC; }
  void clear();
  bool add();

--- a/sql/sql_class.h
+++ b/sql/sql_class.h
@@ -1831,6 +1831,7 @@ class Unique :public Sql_alloc
  Unique(qsort_cmp2 comp_func, void *comp_func_fixed_arg,
 	 uint size_arg, ulong max_in_memory_size_arg);
  ~Unique();
+  ulong elements_in_tree() { return tree.elements_in_tree; }
  inline bool unique_add(void *ptr)
  {
    DBUG_ENTER("unique_add");