diff --git a/sql/examples/ha_tina.h b/sql/examples/ha_tina.h index 67a907fddb6e8359e7877d1b9627226d85c7ad9c..22193c01013de1ddcc6ecbead099c4c0816c1df5 100644 --- a/sql/examples/ha_tina.h +++ b/sql/examples/ha_tina.h @@ -90,6 +90,12 @@ class ha_tina: public handler /* The next method will never be called */ virtual double read_time(ha_rows rows) { DBUG_ASSERT(0); return((double) rows / 20.0+1); } virtual bool fast_key_read() { return 1;} + /* + TODO: return actual upper bound of number of records in the table. + (e.g. save number of records seen on full table scan and/or use file size + as upper bound) + */ + ha_rows estimate_rows_upper_bound() { return HA_POS_ERROR; } int open(const char *name, int mode, uint test_if_locked); int close(void); diff --git a/sql/filesort.cc b/sql/filesort.cc index a84fa4fe6f49bd25ee648528ab80c2f7e7094e99..bd0de022fd418e6307147649a0ae6e804be2acd9 100644 --- a/sql/filesort.cc +++ b/sql/filesort.cc @@ -169,7 +169,13 @@ ha_rows filesort(THD *thd, TABLE *table, SORT_FIELD *sortorder, uint s_length, else #endif { - records=table->file->estimate_number_of_rows(); + records= table->file->estimate_rows_upper_bound(); + /* + If number of records is not known, use as much of sort buffer + as possible. + */ + if (records == HA_POS_ERROR) + records--; // we use 'records+1' below. selected_records_file= 0; } @@ -315,7 +321,7 @@ static char **make_char_array(register uint fields, uint length, myf my_flag) } /* make_char_array */ - /* Read all buffer pointers into memory */ +/* Read 'count' number of buffer pointers into memory */ static BUFFPEK *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count) { @@ -336,8 +342,40 @@ static BUFFPEK *read_buffpek_from_file(IO_CACHE *buffpek_pointers, uint count) } - - /* Search after sort_keys and place them in a temp. file */ +/* + Search after sort_keys and write them into tempfile. + SYNOPSIS + find_all_keys() + param Sorting parameter + select Use this to get source data + sort_keys Array of pointers to sort key + addon buffers. + buffpek_pointers File to write BUFFPEKs describing sorted segments + in tempfile. + tempfile File to write sorted sequences of sortkeys to. + indexfile If !NULL, use it for source data (contains rowids) + + NOTE + Basic idea: + while (get_next_sortkey()) + { + if (no free space in sort_keys buffers) + { + sort sort_keys buffer; + dump sorted sequence to 'tempfile'; + dump BUFFPEK describing sequence location into 'buffpek_pointers'; + } + put sort key into 'sort_keys'; + } + if (sort_keys has some elements && dumped at least once) + sort-dump-dump as above; + else + don't sort, leave sort_keys array to be sorted by caller. + + All produced sequences are guaranteed to be non-empty. + RETURN + Number of records written on success. + HA_POS_ERROR on error. +*/ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select, uchar **sort_keys, @@ -452,7 +490,25 @@ static ha_rows find_all_keys(SORTPARAM *param, SQL_SELECT *select, } /* find_all_keys */ - /* Skriver en buffert med nycklar till filen */ +/* + Sort the buffer and write: + 1) the sorted sequence to tempfile + 2) a BUFFPEK describing the sorted sequence position to buffpek_pointers + (was: Skriver en buffert med nycklar till filen) + SYNOPSIS + write_keys() + param Sort parameters + sort_keys Array of pointers to keys to sort + count Number of elements in sort_keys array + buffpek_pointers One 'BUFFPEK' struct will be written into this file. + The BUFFPEK::{file_pos, count} will indicate where + the sorted data was stored. + tempfile The sorted sequence will be written into this file. + + RETURN + 0 OK + 1 Error +*/ static int write_keys(SORTPARAM *param, register uchar **sort_keys, uint count, @@ -784,7 +840,21 @@ uint read_to_buffer(IO_CACHE *fromfile, BUFFPEK *buffpek, /* - Merge buffers to one buffer + Merge buffers to one buffer + SYNOPSIS + merge_buffers() + param Sort parameter + from_file File with source data (BUFFPEKs point to this file) + to_file File to write the sorted result data. + sort_buffer Buffer for data to store up to MERGEBUFF2 sort keys. + lastbuff OUT Store here BUFFPEK describing data written to to_file + Fb First element in source BUFFPEKs array + Tb Last element in source BUFFPEKs array + flag + + RETURN + 0 - OK + other - error */ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, @@ -822,6 +892,9 @@ int merge_buffers(SORTPARAM *param, IO_CACHE *from_file, strpos= (uchar*) sort_buffer; org_max_rows=max_rows= param->max_rows; + /* The following will fire if there is not enough space in sort_buffer */ + DBUG_ASSERT(maxcount!=0); + if (init_queue(&queue, (uint) (Tb-Fb)+1, offsetof(BUFFPEK,key), 0, (queue_compare) (cmp= get_ptr_compare(sort_length)), (void*) &sort_length)) diff --git a/sql/ha_berkeley.cc b/sql/ha_berkeley.cc index 7cd534d60b3b21772dd5b09d9d4e6f477b7e6e68..32f623b86c955042e62e254be46fdd8c51d5d199 100644 --- a/sql/ha_berkeley.cc +++ b/sql/ha_berkeley.cc @@ -25,7 +25,7 @@ We will need an updated Berkeley DB version for this. - Killing threads that has got a 'deadlock' - SHOW TABLE STATUS should give more information about the table. - - Get a more accurate count of the number of rows (estimate_number_of_rows()). + - Get a more accurate count of the number of rows (estimate_rows_upper_bound()). We could store the found number of rows when the table is scanned and then increment the counter for each attempted write. - We will need to extend the manager thread to makes checkpoints at @@ -63,7 +63,7 @@ #define HA_BERKELEY_ROWS_IN_TABLE 10000 /* to get optimization right */ #define HA_BERKELEY_RANGE_COUNT 100 #define HA_BERKELEY_MAX_ROWS 10000000 /* Max rows in table */ -/* extra rows for estimate_number_of_rows() */ +/* extra rows for estimate_rows_upper_bound() */ #define HA_BERKELEY_EXTRA_ROWS 100 /* Bits for share->status */ @@ -2556,7 +2556,7 @@ static void update_status(BDB_SHARE *share, TABLE *table) Used when sorting to allocate buffers and by the optimizer. */ -ha_rows ha_berkeley::estimate_number_of_rows() +ha_rows ha_berkeley::estimate_rows_upper_bound() { return share->rows + HA_BERKELEY_EXTRA_ROWS; } diff --git a/sql/ha_berkeley.h b/sql/ha_berkeley.h index 5cba3bebf105dc2c072be919a95b9cb9895107d3..25d3e1285020cafd7cf3b0227060b150fc901cd7 100644 --- a/sql/ha_berkeley.h +++ b/sql/ha_berkeley.h @@ -100,7 +100,7 @@ class ha_berkeley: public handler ulong table_flags(void) const { return int_table_flags; } uint max_supported_keys() const { return MAX_KEY-1; } uint extra_rec_buf_length() { return BDB_HIDDEN_PRIMARY_KEY_LENGTH; } - ha_rows estimate_number_of_rows(); + ha_rows estimate_rows_upper_bound(); const key_map *keys_to_use_for_scanning() { return &key_map_full; } bool has_transactions() { return 1;} diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index d787159241a4cebce742c985e49d08d262107d9b..a9a8764c941f59a5db1336d403af52aecb4bb6d8 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -4115,7 +4115,7 @@ Gives an UPPER BOUND to the number of rows in a table. This is used in filesort.cc. */ ha_rows -ha_innobase::estimate_number_of_rows(void) +ha_innobase::estimate_rows_upper_bound(void) /*======================================*/ /* out: upper bound of rows */ { @@ -4124,7 +4124,7 @@ ha_innobase::estimate_number_of_rows(void) ulonglong estimate; ulonglong local_data_file_length; - DBUG_ENTER("estimate_number_of_rows"); + DBUG_ENTER("estimate_rows_upper_bound"); /* We do not know if MySQL can call this function before calling external_lock(). To be safe, update the thd of the current table @@ -4204,7 +4204,7 @@ ha_innobase::read_time( time_for_scan = scan_time(); - if ((total_rows = estimate_number_of_rows()) < rows) + if ((total_rows = estimate_rows_upper_bound()) < rows) return time_for_scan; return (ranges + (double) rows / (double) total_rows * time_for_scan); diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index 6556931fa1afca2bef76c3415933e387ccb1b936..fa6c8a90d21180ad840baf659fbca159b5814663 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -150,7 +150,7 @@ class ha_innobase: public handler void position(byte *record); ha_rows records_in_range(uint inx, key_range *min_key, key_range *max_key); - ha_rows estimate_number_of_rows(); + ha_rows estimate_rows_upper_bound(); int create(const char *name, register TABLE *form, HA_CREATE_INFO *create_info); diff --git a/sql/handler.h b/sql/handler.h index e3a3b25e1cb0fbf860c1627f4ec26dbdebdd9329..0b7e9c043811797e42a2a46fa03575543f7ff941 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -300,7 +300,15 @@ class handler :public Sql_alloc virtual const key_map *keys_to_use_for_scanning() { return &key_map_empty; } virtual bool has_transactions(){ return 0;} virtual uint extra_rec_buf_length() { return 0; } - virtual ha_rows estimate_number_of_rows() { return records+EXTRA_RECORDS; } + + /* + Return upper bound of current number of records in the table + (max. of how many records one will retrieve when doing a full table scan) + If upper bound is not known, HA_POS_ERROR should be returned as a max + possible upper bound. + */ + virtual ha_rows estimate_rows_upper_bound() + { return records+EXTRA_RECORDS; } virtual const char *index_type(uint key_number) { DBUG_ASSERT(0); return "";}