Commit f52bf920 authored by Varun Gupta's avatar Varun Gupta

MDEV-21263: Allow packed values of non-sorted fields in the sort buffer

This task deals with packing the non-sorted fields (or addon fields).
This would lead to efficient usage of the memory allocated for the sort buffer.
The changes brought by this feature are
  1) Sort buffers would have records of variable length
  2) Each record in the sort buffer would be stored like
     <sort_key1><sort_key2>....<addon_length><null_bytes><field1><field2>....
     addon_length is the extra bytes that are required to store the variable
     length of addon field across different records.
  3) Changes in rr_unpack_from_buffer and rr_from_tempfile to take into account
     the variable length of records.

Ported  WL#1509 Pack values of non-sorted fields in the sort buffer from
MySQL by Tor Didriksen
parent ded128aa
This diff is collapsed.
--source include/big_test.inc
--source include/have_sequence.inc
--source include/have_64bit.inc
set @save_rand_seed1= @@RAND_SEED1;
set @save_rand_seed2= @@RAND_SEED2;
set @@RAND_SEED1=810763568, @@RAND_SEED2=600681772;
create table t1(a int);
insert into t1 select seq from seq_1_to_10000 order by rand();
delimiter |;
--echo #
--echo # parameters:
--echo # mean mean for the column to be considered
--echo # max_val max_value for the column to be considered
--echo #
--echo # This function generate a sample of a normal distribution
--echo # This function return a point
--echo # of the normal distribution with a given mean.
--echo #
CREATE FUNCTION
generate_normal_distribution_sample(mean DOUBLE, max_val DOUBLE)RETURNS DOUBLE
BEGIN
DECLARE z DOUBLE DEFAULT 0;
SET z= (rand() + rand() + rand() + rand() + rand() + rand())/6;
SET z= 2*(max_val-mean)*z;
SET z= z + mean - (max_val-mean);
return z;
END|
--echo #
--echo # parameters:
--echo # len length of the random string to be generated
--echo #
--echo # This function generates a random string for the length passed
--echo # as an argument with characters in the range of [A,Z]
--echo #
CREATE FUNCTION generate_random_string(len INT) RETURNS varchar(128)
BEGIN
DECLARE str VARCHAR(256) DEFAULT '';
DECLARE x INT DEFAULT 0;
WHILE (len > 0) DO
SET x =round(rand()*25);
SET str= CONCAT(str, CHAR(65 + x));
SET len= len-1;
END WHILE;
RETURN str;
END|
--echo #
--echo # parameters:
--echo # mean mean for the column to be considered
--echo # min_val min_value for the column to be considered
--echo # max_val max_value for the column to be considered
--echo #
--echo # This function generate a normal distribution sample in the range of
--echo # [min_val, max_val]
--echo #
CREATE FUNCTION
clipped_normal_distribution(mean DOUBLE, min_val DOUBLE, max_val DOUBLE)
RETURNS INT
BEGIN
DECLARE r DOUBLE DEFAULT 0;
WHILE 1=1 DO
set r= generate_normal_distribution_sample(mean, max_val);
IF (r >= min_val AND r <= max_val) THEN
RETURN round(r);
end if;
END WHILE;
RETURN 0;
END|
delimiter ;|
create table t2 (id INT NOT NULL, a INT, b int);
insert into t2
select a, clipped_normal_distribution(12, 0, 64),
clipped_normal_distribution(32, 0, 128)
from t1;
CREATE TABLE t3(
id INT NOT NULL,
names VARCHAR(64),
address VARCHAR(128),
PRIMARY KEY (id)
);
--echo #
--echo # table t3 stores random strings calculated from the length stored in
--echo # table t2
--echo #
insert into t3
select id, generate_random_string(a), generate_random_string(b) from t2;
let $query= select id DIV 100 as x,
MD5(group_concat(substring(names,1,3), substring(address,1,3)
order by substring(names,1,3), substring(address,1,3)))
FROM t3
GROUP BY x;
--echo #
--echo # All records fit in memory
--echo #
set sort_buffer_size=262144*10;
flush status;
eval $query;
show status like '%sort%';
set sort_buffer_size=default;
--echo #
--echo # Test for merge_many_buff
--echo #
set sort_buffer_size=32768;
flush status;
eval $query;
show status like '%sort%';
set sort_buffer_size=default;
set @@RAND_SEED1= @save_rand_seed1;
set @@RAND_SEED2= @save_rand_seed2;
drop function generate_normal_distribution_sample;
drop function generate_random_string;
drop function clipped_normal_distribution;
drop table t1, t2, t3;
......@@ -57,7 +57,7 @@ class Bounded_queue
@param to Where to put the key.
@param from The input data.
*/
typedef void (*keymaker_function)(Sort_param *param,
typedef uint (*keymaker_function)(Sort_param *param,
Key_type *to,
Element_type *from);
......@@ -181,7 +181,7 @@ void Bounded_queue<Element_type, Key_type>::push(Element_type *element)
{
// Replace top element with new key, and re-order the queue.
Key_type **pq_top= reinterpret_cast<Key_type **>(queue_top(&m_queue));
(*m_keymaker)(m_sort_param, *pq_top, element);
(void)(*m_keymaker)(m_sort_param, *pq_top, element);
queue_replace_top(&m_queue);
} else {
// Insert new key into the queue.
......
......@@ -1527,6 +1527,7 @@ class Field: public Value_source
{ return length;}
virtual uint max_packed_col_length(uint max_length)
{ return max_length;}
virtual bool is_packable() { return false; }
uint offset(const uchar *record) const
{
......@@ -2139,6 +2140,7 @@ class Field_longstr :public Field_str
bool can_optimize_range(const Item_bool_func *cond,
const Item *item,
bool is_eq_func) const;
bool is_packable() { return true; }
};
/* base class for float and double and decimal (old one) */
......
This diff is collapsed.
......@@ -27,7 +27,7 @@ class Filesort_tracker;
struct SORT_FIELD;
typedef struct st_order ORDER;
class JOIN;
class Addon_fields;
/**
Sorting related info.
......@@ -87,7 +87,8 @@ class SORT_INFO
public:
SORT_INFO()
:addon_field(0), record_pointers(0)
:addon_fields(NULL), record_pointers(0),
sorted_result_in_fsbuf(FALSE)
{
buffpek.str= 0;
my_b_clear(&io_cache);
......@@ -98,9 +99,11 @@ class SORT_INFO
void free_data()
{
close_cached_file(&io_cache);
free_addon_buff();
my_free(record_pointers);
my_free(buffpek.str);
my_free(addon_field);
my_free(addon_fields);
free_sort_buffer();
}
void reset()
......@@ -108,17 +111,26 @@ class SORT_INFO
free_data();
record_pointers= 0;
buffpek.str= 0;
addon_field= 0;
addon_fields= 0;
sorted_result_in_fsbuf= false;
}
void free_addon_buff();
IO_CACHE io_cache; /* If sorted through filesort */
LEX_STRING buffpek; /* Buffer for buffpek structures */
LEX_STRING addon_buf; /* Pointer to a buffer if sorted with fields */
struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
/* To unpack back */
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
Addon_fields *addon_fields; /* Addon field descriptors */
uchar *record_pointers; /* If sorted in memory */
/**
If the entire result of filesort fits in memory, we skip the merge phase.
We may leave the result in filesort_buffer
(indicated by sorted_result_in_fsbuf), or we may strip away
the sort keys, and copy the sorted result into a new buffer.
@see save_index()
*/
bool sorted_result_in_fsbuf;
/*
How many rows in final result.
Also how many rows in record_pointers, if used
......@@ -131,27 +143,65 @@ class SORT_INFO
void sort_buffer(Sort_param *param, uint count)
{ filesort_buffer.sort_buffer(param, count); }
/**
Accessors for Filesort_buffer (which @c).
*/
uchar *get_record_buffer(uint idx)
{ return filesort_buffer.get_record_buffer(idx); }
uchar **get_sort_keys()
{ return filesort_buffer.get_sort_keys(); }
uchar **alloc_sort_buffer(uint num_records, uint record_length)
uchar *get_sorted_record(uint ix)
{ return filesort_buffer.get_sorted_record(ix); }
uchar *alloc_sort_buffer(uint num_records, uint record_length)
{ return filesort_buffer.alloc_sort_buffer(num_records, record_length); }
void free_sort_buffer()
{ filesort_buffer.free_sort_buffer(); }
bool isfull() const
{ return filesort_buffer.isfull(); }
void init_record_pointers()
{ filesort_buffer.init_record_pointers(); }
void init_next_record_pointer()
{ filesort_buffer.init_next_record_pointer(); }
uchar *get_next_record_pointer()
{ return filesort_buffer.get_next_record_pointer(); }
void adjust_next_record_pointer(uint val)
{ filesort_buffer.adjust_next_record_pointer(val); }
Bounds_checked_array<uchar> get_raw_buf()
{ return filesort_buffer.get_raw_buf(); }
size_t sort_buffer_size() const
{ return filesort_buffer.sort_buffer_size(); }
bool is_allocated() const
{ return filesort_buffer.is_allocated(); }
void set_sort_length(uint val)
{ filesort_buffer.set_sort_length(val); }
uint get_sort_length() const
{ return filesort_buffer.get_sort_length(); }
bool has_filesort_result_in_memory() const
{
return record_pointers || sorted_result_in_fsbuf;
}
/// Are we using "addon fields"?
bool using_addon_fields() const
{
return addon_fields != NULL;
}
/// Are we using "packed addon fields"?
bool using_packed_addons();
/**
Copies (unpacks) values appended to sorted fields from a buffer back to
their regular positions specified by the Field::ptr pointers.
@param buff Buffer which to unpack the value from
*/
template<bool Packed_addon_fields>
inline void unpack_addon_fields(uchar *buff);
friend SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
Filesort_tracker* tracker, JOIN *join,
table_map first_table_bit);
......@@ -162,7 +212,8 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
table_map first_table_bit=0);
bool filesort_use_addons(TABLE *table, uint sortlength,
uint *length, uint *fields, uint *null_fields);
uint *length, uint *fields, uint *null_fields,
uint *m_packable_length);
void change_double_for_sort(double nr,uchar *to);
......
......@@ -96,82 +96,92 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
# Pointer to allocated buffer
*/
uchar **Filesort_buffer::alloc_sort_buffer(uint num_records,
uint record_length)
uchar *Filesort_buffer::alloc_sort_buffer(uint num_records,
uint record_length)
{
size_t buff_size;
uchar **sort_keys, **start_of_data;
DBUG_ENTER("alloc_sort_buffer");
DBUG_EXECUTE_IF("alloc_sort_buffer_fail",
DBUG_SET("+d,simulate_out_of_memory"););
buff_size= ((size_t)num_records) * (record_length + sizeof(uchar*));
set_if_bigger(buff_size, record_length * MERGEBUFF2);
buff_size= ALIGN_SIZE(num_records * (record_length + sizeof(uchar*)));
if (!m_idx_array.is_null())
/*
The minimum memory required should be each merge buffer can hold atmost
one key.
TODO varun: move this to the place where min_sort_memory is used.
*/
set_if_bigger(buff_size, (record_length +sizeof(uchar*)) * MERGEBUFF2);
if (m_rawmem)
{
/*
Reuse old buffer if exists and is large enough
Note that we don't make the buffer smaller, as we want to be
prepared for next subquery iteration.
*/
sort_keys= m_idx_array.array();
if (buff_size > allocated_size)
if (buff_size > m_size_in_bytes)
{
/*
Better to free and alloc than realloc as we don't have to remember
the old values
*/
my_free(sort_keys);
if (!(sort_keys= (uchar**) my_malloc(buff_size,
MYF(MY_THREAD_SPECIFIC))))
my_free(m_rawmem);
if (!(m_rawmem= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))))
{
reset();
m_size_in_bytes= 0;
DBUG_RETURN(0);
}
allocated_size= buff_size;
}
}
else
{
if (!(sort_keys= (uchar**) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))))
if (!(m_rawmem= (uchar*) my_malloc(buff_size, MYF(MY_THREAD_SPECIFIC))))
{
m_size_in_bytes= 0;
DBUG_RETURN(0);
allocated_size= buff_size;
}
}
m_idx_array= Idx_array(sort_keys, num_records);
m_size_in_bytes= buff_size;
m_record_pointers= reinterpret_cast<uchar**>(m_rawmem) +
((m_size_in_bytes / sizeof(uchar*)) - 1);
m_num_records= num_records;
m_record_length= record_length;
start_of_data= m_idx_array.array() + m_idx_array.size();
m_start_of_data= reinterpret_cast<uchar*>(start_of_data);
DBUG_RETURN(m_idx_array.array());
m_idx= 0;
DBUG_RETURN(m_rawmem);
}
void Filesort_buffer::free_sort_buffer()
{
my_free(m_idx_array.array());
m_idx_array.reset();
m_start_of_data= NULL;
my_free(m_rawmem);
*this= Filesort_buffer();
}
void Filesort_buffer::sort_buffer(const Sort_param *param, uint count)
{
size_t size= param->sort_length;
m_sort_keys= get_sort_keys();
if (count <= 1 || size == 0)
return;
uchar **keys= get_sort_keys();
// dont reverse for PQ, it is already done
if (!param->using_pq)
reverse_record_pointers();
uchar **buffer= NULL;
if (radixsort_is_appliccable(count, param->sort_length) &&
(buffer= (uchar**) my_malloc(count*sizeof(char*),
MYF(MY_THREAD_SPECIFIC))))
{
radixsort_for_str_ptr(keys, count, param->sort_length, buffer);
radixsort_for_str_ptr(m_sort_keys, count, param->sort_length, buffer);
my_free(buffer);
return;
}
my_qsort2(keys, count, sizeof(uchar*), get_ptr_compare(size), &size);
my_qsort2(m_sort_keys, count, sizeof(uchar*), get_ptr_compare(size), &size);
}
......@@ -46,68 +46,194 @@ double get_merge_many_buffs_cost_fast(ha_rows num_rows,
/**
A wrapper class around the buffer used by filesort().
The buffer is a contiguous chunk of memory,
where the first part is <num_records> pointers to the actual data.
The sort buffer is a contiguous chunk of memory,
containing both records to be sorted, and pointers to said records:
<start of buffer | still unused | end of buffer>
|rec 0|record 1 |rec 2| ............ |ptr to rec2|ptr to rec1|ptr to rec0|
Records will be inserted "left-to-right". Records are not necessarily
fixed-size, they can be packed and stored without any "gaps".
Record pointers will be inserted "right-to-left", as a side-effect
of inserting the actual records.
We wrap the buffer in order to be able to do lazy initialization of the
pointers: the buffer is often much larger than what we actually need.
With this allocation scheme, and lazy initialization of the pointers,
we are able to pack variable-sized records in the buffer,
and thus possibly have space for more records than we initially estimated.
The buffer must be kept available for multiple executions of the
same sort operation, so we have explicit allocate and free functions,
rather than doing alloc/free in CTOR/DTOR.
*/
class Filesort_buffer
{
public:
Filesort_buffer()
: m_idx_array(), m_start_of_data(NULL), allocated_size(0)
Filesort_buffer() :
m_next_rec_ptr(NULL), m_rawmem(NULL), m_record_pointers(NULL),
m_sort_keys(NULL),
m_num_records(0), m_record_length(0),
m_sort_length(0),
m_size_in_bytes(0), m_idx(0)
{}
~Filesort_buffer()
/** Sort me... */
void sort_buffer(const Sort_param *param, uint count);
/**
Reverses the record pointer array, to avoid recording new results for
non-deterministic mtr tests.
*/
void reverse_record_pointers()
{
my_free(m_idx_array.array());
if (m_idx < 2) // There is nothing to swap.
return;
uchar **keys= get_sort_keys();
const longlong count= m_idx - 1;
for (longlong ix= 0; ix <= count/2; ++ix)
{
uchar *tmp= keys[count - ix];
keys[count - ix] = keys[ix];
keys[ix]= tmp;
}
}
bool is_allocated()
/**
Initializes all the record pointers.
*/
void init_record_pointers()
{
return m_idx_array.array() != 0;
init_next_record_pointer();
while (m_idx < m_num_records)
(void) get_next_record_pointer();
reverse_record_pointers();
}
void reset()
/**
Prepares the buffer for the next batch of records to process.
*/
void init_next_record_pointer()
{
m_idx_array.reset();
m_idx= 0;
m_next_rec_ptr= m_rawmem;
m_sort_keys= NULL;
}
/** Sort me... */
void sort_buffer(const Sort_param *param, uint count);
/**
@returns the number of bytes currently in use for data.
*/
size_t space_used_for_data() const
{
return m_next_rec_ptr ? m_next_rec_ptr - m_rawmem : 0;
}
/// Initializes a record pointer.
uchar *get_record_buffer(uint idx)
/**
@returns the number of bytes left in the buffer.
*/
size_t spaceleft() const
{
m_idx_array[idx]= m_start_of_data + (idx * m_record_length);
return m_idx_array[idx];
DBUG_ASSERT(m_next_rec_ptr >= m_rawmem);
const size_t spaceused=
(m_next_rec_ptr - m_rawmem) +
(static_cast<size_t>(m_idx) * sizeof(uchar*));
return m_size_in_bytes - spaceused;
}
/// Initializes all the record pointers.
void init_record_pointers()
/**
Is the buffer full?
*/
bool isfull() const
{
if (m_idx < m_num_records)
return false;
return spaceleft() < (m_record_length + sizeof(uchar*));
}
/**
Where should the next record be stored?
*/
uchar *get_next_record_pointer()
{
uchar *retval= m_next_rec_ptr;
// Save the return value in the record pointer array.
m_record_pointers[-m_idx]= m_next_rec_ptr;
// Prepare for the subsequent request.
m_idx++;
m_next_rec_ptr+= m_record_length;
return retval;
}
/**
Adjusts for actual record length. get_next_record_pointer() above was
pessimistic, and assumed that the record could not be packed.
*/
void adjust_next_record_pointer(uint val)
{
for (uint ix= 0; ix < m_idx_array.size(); ++ix)
(void) get_record_buffer(ix);
m_next_rec_ptr-= (m_record_length - val);
}
/// Returns total size: pointer array + record buffers.
size_t sort_buffer_size() const
{
return allocated_size;
return m_size_in_bytes;
}
/// Allocates the buffer, but does *not* initialize pointers.
uchar **alloc_sort_buffer(uint num_records, uint record_length);
bool is_allocated() const
{
return m_rawmem;
}
/**
Allocates the buffer, but does *not* initialize pointers.
Total size = (num_records * record_length) + (num_records * sizeof(pointer))
space for records space for pointer to records
Caller is responsible for raising an error if allocation fails.
@param num_records Number of records.
@param record_length (maximum) size of each record.
@returns Pointer to allocated area, or NULL in case of out-of-memory.
*/
uchar *alloc_sort_buffer(uint num_records, uint record_length);
/// Frees the buffer.
void free_sort_buffer();
/// Getter, for calling routines which still use the uchar** interface.
uchar **get_sort_keys() { return m_idx_array.array(); }
void reset()
{
m_rawmem= NULL;
}
/**
Used to access the "right-to-left" array of record pointers as an ordinary
"left-to-right" array, so that we can pass it directly on to std::sort().
*/
uchar **get_sort_keys()
{
if (m_idx == 0)
return NULL;
return &m_record_pointers[1 - m_idx];
}
/**
Gets sorted record number ix. @see get_sort_keys()
Only valid after buffer has been sorted!
*/
uchar *get_sorted_record(uint ix)
{
return m_sort_keys[ix];
}
/**
@returns The entire buffer, as a character array.
This is for reusing the memory for merge buffers.
*/
Bounds_checked_array<uchar> get_raw_buf()
{
return Bounds_checked_array<uchar>(m_rawmem, m_size_in_bytes);
}
/**
We need an assignment operator, see filesort().
......@@ -117,20 +243,40 @@ class Filesort_buffer
*/
Filesort_buffer &operator=(const Filesort_buffer &rhs)
{
m_idx_array= rhs.m_idx_array;
m_next_rec_ptr= rhs.m_next_rec_ptr;
m_rawmem= rhs.m_rawmem;
m_record_pointers= rhs.m_record_pointers;
m_sort_keys= rhs.m_sort_keys;
m_num_records= rhs.m_num_records;
m_record_length= rhs.m_record_length;
m_start_of_data= rhs.m_start_of_data;
allocated_size= rhs.allocated_size;
m_sort_length= rhs.m_sort_length;
m_size_in_bytes= rhs.m_size_in_bytes;
m_idx= rhs.m_idx;
return *this;
}
uint get_sort_length() const { return m_sort_length; }
void set_sort_length(uint val) { m_sort_length= val; }
private:
typedef Bounds_checked_array<uchar*> Idx_array;
uchar *m_next_rec_ptr; /// The next record will be inserted here.
uchar *m_rawmem; /// The raw memory buffer.
uchar **m_record_pointers; /// The "right-to-left" array of record pointers.
uchar **m_sort_keys; /// Caches the value of get_sort_keys()
uint m_num_records; /// Saved value from alloc_sort_buffer()
uint m_record_length; /// Saved value from alloc_sort_buffer()
uint m_sort_length; /// The length of the sort key.
size_t m_size_in_bytes; /// Size of raw buffer, in bytes.
Idx_array m_idx_array; /* Pointers to key data */
uint m_record_length;
uchar *m_start_of_data; /* Start of key data */
size_t allocated_size;
/**
This is the index in the "right-to-left" array of the next record to
be inserted into the buffer. It is signed, because we use it in signed
expressions like:
m_record_pointers[-m_idx];
It is longlong rather than int, to ensure that it covers UINT_MAX32
without any casting/warning.
*/
longlong m_idx;
};
#endif // FILESORT_UTILS_INCLUDED
......@@ -38,8 +38,8 @@
static int rr_quick(READ_RECORD *info);
int rr_sequential(READ_RECORD *info);
static int rr_from_tempfile(READ_RECORD *info);
static int rr_unpack_from_tempfile(READ_RECORD *info);
static int rr_unpack_from_buffer(READ_RECORD *info);
template<bool> static int rr_unpack_from_tempfile(READ_RECORD *info);
template<bool> static int rr_unpack_from_buffer(READ_RECORD *info);
int rr_from_pointers(READ_RECORD *info);
static int rr_from_cache(READ_RECORD *info);
static int init_rr_cache(THD *thd, READ_RECORD *info);
......@@ -187,23 +187,23 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
bool disable_rr_cache)
{
IO_CACHE *tempfile;
SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0;
DBUG_ENTER("init_read_record");
const bool using_addon_fields= filesort && filesort->using_addon_fields();
bzero((char*) info,sizeof(*info));
info->thd=thd;
info->table=table;
info->addon_field= addon_field;
info->sort_info= filesort;
if ((table->s->tmp_table == INTERNAL_TMP_TABLE) &&
!addon_field)
!using_addon_fields)
(void) table->file->extra(HA_EXTRA_MMAP);
if (addon_field)
if (using_addon_fields)
{
info->rec_buf= (uchar*) filesort->addon_buf.str;
info->ref_length= (uint)filesort->addon_buf.length;
info->unpack= filesort->unpack;
info->rec_buf= filesort->addon_fields->get_addon_buf();
info->ref_length= filesort->addon_fields->get_addon_buf_length();
}
else
{
......@@ -223,9 +223,20 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
if (tempfile && !(select && select->quick))
{
DBUG_PRINT("info",("using rr_from_tempfile"));
info->read_record_func=
addon_field ? rr_unpack_from_tempfile : rr_from_tempfile;
if (using_addon_fields)
{
DBUG_PRINT("info",("using rr_from_tempfile"));
if (filesort->addon_fields->using_packed_addons())
info->read_record_func= rr_unpack_from_tempfile<true>;
else
info->read_record_func= rr_unpack_from_tempfile<false>;
}
else
{
DBUG_PRINT("info",("using rr_from_tempfile"));
info->read_record_func= rr_from_tempfile;
}
info->io_cache= tempfile;
reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
info->ref_pos=table->file->ref;
......@@ -239,7 +250,7 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
and filesort->io_cache is read sequentially
*/
if (!disable_rr_cache &&
!addon_field &&
!using_addon_fields &&
thd->variables.read_rnd_buff_size &&
!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
(table->db_stat & HA_READ_ONLY ||
......@@ -264,16 +275,29 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
DBUG_PRINT("info",("using rr_quick"));
info->read_record_func= rr_quick;
}
else if (filesort && filesort->record_pointers)
else if (filesort && filesort->has_filesort_result_in_memory())
{
DBUG_PRINT("info",("using record_pointers"));
if (unlikely(table->file->ha_rnd_init_with_error(0)))
DBUG_RETURN(1);
info->cache_pos= filesort->record_pointers;
info->cache_end= (info->cache_pos+
filesort->return_rows * info->ref_length);
info->read_record_func=
addon_field ? rr_unpack_from_buffer : rr_from_pointers;
if (using_addon_fields)
{
DBUG_PRINT("info",("using rr_unpack_from_buffer"));
DBUG_ASSERT(filesort->sorted_result_in_fsbuf);
info->unpack_counter= 0;
if (filesort->using_packed_addons())
info->read_record_func= rr_unpack_from_buffer<true>;
else
info->read_record_func= rr_unpack_from_buffer<false>;
}
else
{
info->cache_end= (info->cache_pos+
filesort->return_rows * info->ref_length);
info->read_record_func= rr_from_pointers;
}
}
else if (table->file->keyread_enabled())
{
......@@ -510,7 +534,11 @@ static int rr_from_tempfile(READ_RECORD *info)
the fields values use in the result set from this buffer into their
positions in the regular record buffer.
@param info Reference to the context including record descriptors
@param info Reference to the context including record
descriptors
@param Packed_addon_fields Are the addon fields packed?
This is a compile-time constant, to
avoid if (....) tests during execution.
@retval
0 Record successfully read.
......@@ -518,12 +546,38 @@ static int rr_from_tempfile(READ_RECORD *info)
-1 There is no record to be read anymore.
*/
template<bool Packed_addon_fields>
static int rr_unpack_from_tempfile(READ_RECORD *info)
{
if (my_b_read(info->io_cache, info->rec_buf, info->ref_length))
return -1;
(*info->unpack)(info->addon_field, info->rec_buf,
info->rec_buf + info->ref_length);
uchar *destination= info->rec_buf;
#ifndef DBUG_OFF
my_off_t where= my_b_tell(info->io_cache);
#endif
if (Packed_addon_fields)
{
const uint len_sz= Addon_fields::size_of_length_field;
// First read length of the record.
if (my_b_read(info->io_cache, destination, len_sz))
return -1;
uint res_length= Addon_fields::read_addon_length(destination);
DBUG_PRINT("info", ("rr_unpack from %llu to %p sz %u",
static_cast<ulonglong>(where),
destination, res_length));
DBUG_ASSERT(res_length > len_sz);
DBUG_ASSERT(info->sort_info->using_addon_fields());
// Then read the rest of the record.
if (my_b_read(info->io_cache, destination + len_sz, res_length - len_sz))
return -1; /* purecov: inspected */
}
else
{
if (my_b_read(info->io_cache, destination, info->ref_length))
return -1;
}
info->sort_info->unpack_addon_fields<Packed_addon_fields>(destination);
return 0;
}
......@@ -560,7 +614,11 @@ int rr_from_pointers(READ_RECORD *info)
the fields values use in the result set from this buffer into their
positions in the regular record buffer.
@param info Reference to the context including record descriptors
@param info Reference to the context including record
descriptors
@param Packed_addon_fields Are the addon fields packed?
This is a compile-time constant, to
avoid if (....) tests during execution.
@retval
0 Record successfully read.
......@@ -568,13 +626,17 @@ int rr_from_pointers(READ_RECORD *info)
-1 There is no record to be read anymore.
*/
template<bool Packed_addon_fields>
static int rr_unpack_from_buffer(READ_RECORD *info)
{
if (info->cache_pos == info->cache_end)
if (info->unpack_counter == info->sort_info->return_rows)
return -1; /* End of buffer */
(*info->unpack)(info->addon_field, info->cache_pos,
info->cache_end);
info->cache_pos+= info->ref_length;
uchar *record= info->sort_info->get_sorted_record(
static_cast<uint>(info->unpack_counter));
uchar *plen= record + info->sort_info->get_sort_length();
info->sort_info->unpack_addon_fields<Packed_addon_fields>(plen);
info->unpack_counter++;
return 0;
}
/* cacheing of records from a database */
......@@ -709,3 +771,26 @@ static int rr_cmp(uchar *a,uchar *b)
return (int) a[7] - (int) b[7];
#endif
}
template<bool Packed_addon_fields>
inline void SORT_INFO::unpack_addon_fields(uchar *buff)
{
SORT_ADDON_FIELD *addonf= addon_fields->begin();
uchar *buff_end= buff + sort_buffer_size();
const uchar *start_of_record= buff + addonf->offset;
for ( ; addonf != addon_fields->end() ; addonf++)
{
Field *field= addonf->field;
if (addonf->null_bit && (addonf->null_bit & buff[addonf->null_offset]))
{
field->set_null();
continue;
}
field->set_notnull();
if (Packed_addon_fields)
start_of_record= field->unpack(field->ptr, start_of_record, buff_end, 0);
else
field->unpack(field->ptr, buff + addonf->offset, buff_end, 0);
}
}
......@@ -58,13 +58,23 @@ struct READ_RECORD
THD *thd;
SQL_SELECT *select;
uint ref_length, reclength, rec_cache_size, error_offset;
/**
Counting records when reading result from filesort().
Used when filesort leaves the result in the filesort buffer.
*/
ha_rows unpack_counter;
uchar *ref_pos; /* pointer to form->refpos */
uchar *rec_buf; /* to read field values after filesort */
uchar *cache,*cache_pos,*cache_end,*read_positions;
struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
/*
Structure storing information about sorting
*/
SORT_INFO *sort_info;
struct st_io_cache *io_cache;
bool print_error;
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
int read_record() { return read_record_func(this); }
uchar *record() const { return table->record[0]; }
......
......@@ -85,6 +85,10 @@ template <typename Element_type> class Bounds_checked_array
Element_type *array() const { return m_array; }
Element_type *begin() const { return array(); }
Element_type *end() const { return array() + m_size; }
bool operator==(const Bounds_checked_array<Element_type>&rhs) const
{
return m_array == rhs.m_array && m_size == rhs.m_size;
......
......@@ -13997,7 +13997,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
*simple_order= head->on_expr_ref[0] == NULL;
if (*simple_order && head->table->file->ha_table_flags() & HA_SLOW_RND_POS)
{
uint u1, u2, u3;
uint u1, u2, u3, u4;
/*
normally the condition is (see filesort_use_addons())
......@@ -14008,7 +14008,7 @@ remove_const(JOIN *join,ORDER *first_order, COND *cond,
TODO proper cost estimations
*/
*simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3);
*simple_order= filesort_use_addons(head->table, 0, &u1, &u2, &u3, &u4);
}
}
else
......
This diff is collapsed.
......@@ -39,7 +39,6 @@
#include "my_tree.h" // element_count
#include "uniques.h" // Unique
#include "sql_sort.h"
#include "myisamchk.h" // BUFFPEK
int unique_write_to_file(uchar* key, element_count count, Unique *unique)
{
......@@ -94,7 +93,7 @@ Unique::Unique(qsort_cmp2 comp_func, void * comp_func_fixed_arg,
init_tree(&tree, (max_in_memory_size / 16), 0, size, comp_func,
NULL, comp_func_fixed_arg, MYF(MY_THREAD_SPECIFIC));
/* If the following fail's the next add will also fail */
my_init_dynamic_array(&file_ptrs, sizeof(BUFFPEK), 16, 16,
my_init_dynamic_array(&file_ptrs, sizeof(Merge_chunk), 16, 16,
MYF(MY_THREAD_SPECIFIC));
/*
If you change the following, change it in get_max_elements function, too.
......@@ -375,10 +374,10 @@ Unique::~Unique()
/* Write tree to disk; clear tree */
bool Unique::flush()
{
BUFFPEK file_ptr;
Merge_chunk file_ptr;
elements+= tree.elements_in_tree;
file_ptr.count=tree.elements_in_tree;
file_ptr.file_pos=my_b_tell(&file);
file_ptr.set_rowcount(tree.elements_in_tree);
file_ptr.set_file_position(my_b_tell(&file));
tree_walk_action action= min_dupl_count ?
(tree_walk_action) unique_write_to_file_with_count :
......@@ -490,7 +489,7 @@ void put_counter_into_merged_element(void *ptr, uint ofs, element_count cnt)
*/
static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
uint key_length, BUFFPEK *begin, BUFFPEK *end,
uint key_length, Merge_chunk *begin, Merge_chunk *end,
tree_walk_action walk_action, void *walk_action_arg,
qsort_cmp2 compare, void *compare_arg,
IO_CACHE *file, bool with_counters)
......@@ -499,7 +498,8 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
QUEUE queue;
if (end <= begin ||
merge_buffer_size < (size_t) (key_length * (end - begin + 1)) ||
init_queue(&queue, (uint) (end - begin), offsetof(BUFFPEK, key), 0,
init_queue(&queue, (uint) (end - begin),
offsetof(Merge_chunk, m_current_key), 0,
buffpek_compare, &compare_context, 0, 0))
return 1;
/* we need space for one key when a piece of merge buffer is re-read */
......@@ -510,10 +510,16 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
/* if piece_size is aligned reuse_freed_buffer will always hit */
uint piece_size= max_key_count_per_piece * key_length;
ulong bytes_read; /* to hold return value of read_to_buffer */
BUFFPEK *top;
Merge_chunk *top;
int res= 1;
uint cnt_ofs= key_length - (with_counters ? sizeof(element_count) : 0);
element_count cnt;
// read_to_buffer() needs only rec_length.
Sort_param sort_param;
sort_param.rec_length= key_length;
DBUG_ASSERT(!sort_param.using_addon_fields());
/*
Invariant: queue must contain top element from each tree, until a tree
is not completely walked through.
......@@ -522,15 +528,16 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
*/
for (top= begin; top != end; ++top)
{
top->base= merge_buffer + (top - begin) * piece_size;
top->max_keys= max_key_count_per_piece;
bytes_read= read_to_buffer(file, top, key_length);
top->set_buffer_start(merge_buffer + (top - begin) * piece_size);
top->set_buffer_end(top->buffer_start() + piece_size);
top->set_max_keys(max_key_count_per_piece);
bytes_read= read_to_buffer(file, top, &sort_param);
if (unlikely(bytes_read == (ulong) -1))
goto end;
DBUG_ASSERT(bytes_read);
queue_insert(&queue, (uchar *) top);
}
top= (BUFFPEK *) queue_top(&queue);
top= (Merge_chunk *) queue_top(&queue);
while (queue.elements > 1)
{
/*
......@@ -540,20 +547,21 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
elements in each tree are unique. Action is applied only to unique
elements.
*/
void *old_key= top->key;
void *old_key= top->current_key();
/*
read next key from the cache or from the file and push it to the
queue; this gives new top.
*/
top->key+= key_length;
if (--top->mem_count)
top->advance_current_key(key_length);
top->decrement_mem_count();
if (top->mem_count())
queue_replace_top(&queue);
else /* next piece should be read */
{
/* save old_key not to overwrite it in read_to_buffer */
memcpy(save_key_buff, old_key, key_length);
old_key= save_key_buff;
bytes_read= read_to_buffer(file, top, key_length);
bytes_read= read_to_buffer(file, top, &sort_param);
if (unlikely(bytes_read == (ulong) -1))
goto end;
else if (bytes_read) /* top->key, top->mem_count are reset */
......@@ -568,9 +576,9 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
reuse_freed_buff(&queue, top, key_length);
}
}
top= (BUFFPEK *) queue_top(&queue);
top= (Merge_chunk *) queue_top(&queue);
/* new top has been obtained; if old top is unique, apply the action */
if (compare(compare_arg, old_key, top->key))
if (compare(compare_arg, old_key, top->current_key()))
{
cnt= with_counters ?
get_counter_from_merged_element(old_key, cnt_ofs) : 1;
......@@ -579,9 +587,9 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
}
else if (with_counters)
{
cnt= get_counter_from_merged_element(top->key, cnt_ofs);
cnt= get_counter_from_merged_element(top->current_key(), cnt_ofs);
cnt+= get_counter_from_merged_element(old_key, cnt_ofs);
put_counter_into_merged_element(top->key, cnt_ofs, cnt);
put_counter_into_merged_element(top->current_key(), cnt_ofs, cnt);
}
}
/*
......@@ -595,13 +603,13 @@ static bool merge_walk(uchar *merge_buffer, size_t merge_buffer_size,
{
cnt= with_counters ?
get_counter_from_merged_element(top->key, cnt_ofs) : 1;
if (walk_action(top->key, cnt, walk_action_arg))
get_counter_from_merged_element(top->current_key(), cnt_ofs) : 1;
if (walk_action(top->current_key(), cnt, walk_action_arg))
goto end;
top->key+= key_length;
top->advance_current_key(key_length);
}
while (--top->mem_count);
bytes_read= read_to_buffer(file, top, key_length);
while (top->decrement_mem_count());
bytes_read= read_to_buffer(file, top, &sort_param);
if (unlikely(bytes_read == (ulong) -1))
goto end;
}
......@@ -657,13 +665,14 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
if (!(merge_buffer = (uchar *)my_malloc(buff_sz, MYF(MY_WME))))
return 1;
if (buff_sz < full_size * (file_ptrs.elements + 1UL))
res= merge(table, merge_buffer, buff_sz >= full_size * MERGEBUFF2) ;
res= merge(table, merge_buffer, buff_sz,
buff_sz >= full_size * MERGEBUFF2) ;
if (!res)
{
res= merge_walk(merge_buffer, buff_sz, full_size,
(BUFFPEK *) file_ptrs.buffer,
(BUFFPEK *) file_ptrs.buffer + file_ptrs.elements,
(Merge_chunk *) file_ptrs.buffer,
(Merge_chunk *) file_ptrs.buffer + file_ptrs.elements,
action, walk_action_arg,
tree.compare, tree.custom_arg, &file, with_counters);
}
......@@ -684,16 +693,18 @@ bool Unique::walk(TABLE *table, tree_walk_action action, void *walk_action_arg)
All params are 'IN':
table the parameter to access sort context
buff merge buffer
buff_size size of merge buffer
without_last_merge TRUE <=> do not perform the last merge
RETURN VALUE
0 OK
<> 0 error
*/
bool Unique::merge(TABLE *table, uchar *buff, bool without_last_merge)
bool Unique::merge(TABLE *table, uchar *buff, size_t buff_size,
bool without_last_merge)
{
IO_CACHE *outfile= &sort.io_cache;
BUFFPEK *file_ptr= (BUFFPEK*) file_ptrs.buffer;
Merge_chunk *file_ptr= (Merge_chunk*) file_ptrs.buffer;
uint maxbuffer= file_ptrs.elements - 1;
my_off_t save_pos;
bool error= 1;
......@@ -724,7 +735,9 @@ bool Unique::merge(TABLE *table, uchar *buff, bool without_last_merge)
sort_param.cmp_context.key_compare_arg= tree.custom_arg;
/* Merge the buffers to one file, removing duplicates */
if (merge_many_buff(&sort_param,buff,file_ptr,&maxbuffer,&file))
if (merge_many_buff(&sort_param,
Bounds_checked_array<uchar>(buff, buff_size),
file_ptr,&maxbuffer,&file))
goto err;
if (flush_io_cache(&file) ||
reinit_io_cache(&file,READ_CACHE,0L,0,0))
......@@ -736,7 +749,8 @@ bool Unique::merge(TABLE *table, uchar *buff, bool without_last_merge)
file_ptrs.elements= maxbuffer+1;
return 0;
}
if (merge_index(&sort_param, buff, file_ptr, maxbuffer, &file, outfile))
if (merge_index(&sort_param, Bounds_checked_array<uchar>(buff, buff_size),
file_ptr, maxbuffer, &file, outfile))
goto err;
error= 0;
err:
......@@ -791,7 +805,7 @@ bool Unique::get(TABLE *table)
MYF(MY_THREAD_SPECIFIC|MY_WME))))
DBUG_RETURN(1);
if (merge(table, sort_buffer, FALSE))
if (merge(table, sort_buffer, buff_sz, FALSE))
goto err;
rc= 0;
......
......@@ -39,7 +39,7 @@ class Unique :public Sql_alloc
uint min_dupl_count; /* always 0 for unions, > 0 for intersections */
bool with_counters;
bool merge(TABLE *table, uchar *buff, bool without_last_merge);
bool merge(TABLE *table, uchar *buff, size_t size, bool without_last_merge);
bool flush();
public:
......
......@@ -299,11 +299,11 @@ matricule nom prenom
7626 HENIN PHILIPPE
403 HERMITTE PHILIPPE
9096 HELENA PHILIPPE
SELECT matricule, nom, prenom FROM t2 ORDER BY nom LIMIT 10;
SELECT matricule, nom, prenom FROM t2 ORDER BY nom,prenom LIMIT 10;
matricule nom prenom
4552 ABBADIE MONIQUE
6627 ABBAYE GERALD
307 ABBAYE ANNICK
6627 ABBAYE GERALD
7961 ABBE KATIA
1340 ABBE MICHELE
9270 ABBE SOPHIE
......
......@@ -120,7 +120,7 @@ SELECT matricule, nom, prenom FROM t2 WHERE nom <= 'ABEL' OR nom > 'YVON';
SELECT matricule, nom, prenom FROM t2 WHERE nom > 'HELEN' AND nom < 'HEROS';
SELECT matricule, nom, prenom FROM t2 WHERE nom BETWEEN 'HELEN' AND 'HEROS';
SELECT matricule, nom, prenom FROM t2 WHERE nom BETWEEN 'HELEN' AND 'HEROS' AND prenom = 'PHILIPPE';
SELECT matricule, nom, prenom FROM t2 ORDER BY nom LIMIT 10;
SELECT matricule, nom, prenom FROM t2 ORDER BY nom,prenom LIMIT 10;
SELECT a.nom, a.prenom, b.nom FROM t1 a STRAIGHT_JOIN t2 b ON a.prenom = b.prenom WHERE a.nom = 'FOCH' AND a.nom != b.nom;
DROP TABLE t2;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment