Commit 884b83e2 authored by Jacob Mathew's avatar Jacob Mathew

MDEV-14500: Support engines without rnd_pos() and

            engines with inefficient rnd_pos()

Some engines have not implemented rnd_pos().  There are other engines whose
implementation of rnd_pos() is inherently inefficient.  Spider is such an
engine, whose implementation of rnd_pos() needs to access a table on a remote
data node to retrieve a single table row.

To address these limitations, a new temporary table has been added to filesort.
When filesort sequentially reads the table being sorted, each row is written to
the filesort temp table in addition to being copied to the sort buffer.
Subsequent calls to rnd_pos() will then access the table row in the filesort
temp table instead of in the table being sorted.

The following logic changes incorporate the new filesort temp table into the
server:
- A new handler method to determine whether a call to the engine's
  rnd_pos() is expensive.  The default return value is FALSE.  Engines without
  rnd_pos() or with an inefficient rnd_pos() should return TRUE.
- Create the filesort temp table only if:
  - There are no add-on columns for filesort; and
  - The engine's implementation of rnd_pos() is expensive.
- Write to the temp table each row that is read from the table being sorted.
- Do subsequent row retrievals that use rnd_pos() on the temp table instead of
  on the table being sorted.  Upon retrieving a row from the temp table, copy
  its column values to the record of the table being sorted.
- Upon completion of retrieval of the sorted result rows, delete the filesort
  temp table and free the memory allocated for using it.

The logic changes are in the following areas:
- Table handler.
- Partition engine.
- Spider engine.
- Filesort.
- Read record manager.

Note that these changes only address the use of rnd_pos() by filesort.  They do
not address the use of rnd_pos() in other areas such as:
- Quick select.
- Insert.
- Update.
- Window functions.
- Multi Range Read.

Author:
  Jacob Mathew.

Reviewer:
  Sergei Golubchik.
parent d23fcc42
......@@ -55,6 +55,11 @@ static bool write_keys(Sort_param *param, SORT_INFO *fs_info,
uint count, IO_CACHE *buffer_file, IO_CACHE *tempfile);
static void make_sortkey(Sort_param *param, uchar *to, uchar *ref_pos);
static void register_used_fields(Sort_param *param);
static void register_tmp_table_fields(SORT_INFO *fs_info);
static int create_fs_tmp_table_if_needed(THD *thd, Sort_param *param,
SORT_INFO *fs_info);
static int write_fs_tmp_table_row(THD *thd, SORT_INFO *fs_info);
static void free_fs_tmp_table(THD *thd, SORT_INFO *fs_info);
static bool save_index(Sort_param *param, uint count,
SORT_INFO *table_sort);
static uint suffix_length(ulong string_length);
......@@ -63,7 +68,8 @@ static uint sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
static SORT_ADDON_FIELD *get_addon_fields(ulong max_length_for_sort_data,
Field **ptabfield,
uint sortlength,
LEX_STRING *addon_buf);
LEX_STRING *addon_buf,
uint *ptmp_fields);
static void unpack_addon_fields(struct st_sort_addon_field *addon_field,
uchar *buff, uchar *buff_end);
static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info,
......@@ -72,7 +78,8 @@ static bool check_if_pq_applicable(Sort_param *param, SORT_INFO *info,
void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
ulong max_length_for_sort_data,
ha_rows maxrows, bool sort_positions)
ha_rows maxrows, bool sort_positions,
uint *tmp_fields)
{
DBUG_ASSERT(addon_field == 0 && addon_buf.length == 0);
......@@ -86,7 +93,8 @@ void Sort_param::init_for_filesort(uint sortlen, TABLE *table,
to sorted fields and get its total length in addon_buf.length
*/
addon_field= get_addon_fields(max_length_for_sort_data,
table->field, sort_length, &addon_buf);
table->field, sort_length, &addon_buf,
tmp_fields);
}
if (addon_field)
{
......@@ -189,10 +197,11 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
sort->found_rows= HA_POS_ERROR;
param.init_for_filesort(sortlength(thd, filesort->sortorder, s_length,
&multi_byte_charset),
&multi_byte_charset),
table,
thd->variables.max_length_for_sort_data,
max_rows, filesort->sort_positions);
max_rows, filesort->sort_positions,
&sort->tmp_fields);
sort->addon_buf= param.addon_buf;
sort->addon_field= param.addon_field;
......@@ -273,7 +282,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
num_rows= find_all_keys(thd, &param, select,
sort,
&buffpek_pointers,
&tempfile,
&tempfile,
pq.is_initialized() ? &pq : NULL,
&sort->found_rows);
if (num_rows == HA_POS_ERROR)
......@@ -345,7 +354,7 @@ SORT_INFO *filesort(THD *thd, TABLE *table, Filesort *filesort,
}
error= 0;
err:
err:
my_free(param.tmp_buffer);
if (!subselect || !subselect->is_uncacheable())
{
......@@ -700,7 +709,7 @@ static void dbug_print_record(TABLE *table, bool print_rowid)
static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
SORT_INFO *fs_info,
IO_CACHE *buffpek_pointers,
IO_CACHE *buffpek_pointers,
IO_CACHE *tempfile,
Bounded_queue<uchar, uchar> *pq,
ha_rows *found_rows)
......@@ -709,8 +718,10 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
uint idx,indexpos,ref_length;
uchar *ref_pos,*next_pos,ref_buff[MAX_REFLENGTH];
my_off_t record;
TABLE *fs_tmp_table;
TABLE *sort_form;
handler *file;
handler *ref_file;
MY_BITMAP *save_read_set, *save_write_set, *save_vcol_set;
Item *sort_cond;
ha_rows retval;
......@@ -728,9 +739,24 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
quick_select=select && select->quick;
record=0;
*found_rows= 0;
if (!quick_select)
{
/*
Potentially create a temp table to avoid rnd_pos() calls on the
table to be sorted
*/
if (create_fs_tmp_table_if_needed(thd, param, fs_info))
goto err;
fs_tmp_table= fs_info->fs_tmp_table;
}
else
fs_tmp_table= NULL;
ref_file= (fs_tmp_table ? fs_tmp_table->file : file);
flag= ((file->ha_table_flags() & HA_REC_NOT_IN_SEQ) || quick_select);
if (flag)
ref_pos= &file->ref[0];
ref_pos= &ref_file->ref[0];
next_pos=ref_pos;
DBUG_EXECUTE_IF("show_explain_in_find_all_keys",
......@@ -760,6 +786,8 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
register_used_fields(param);
if (quick_select)
select->quick->add_used_key_part_to_set();
else
register_tmp_table_fields(fs_info);
sort_cond= (!select ? 0 :
(!select->pre_idx_push_select_cond ?
......@@ -786,18 +814,25 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
}
else /* Not quick-select */
{
error= file->ha_rnd_next(sort_form->record[0]);
if (!flag)
{
error= file->ha_rnd_next(sort_form->record[0]);
if (!flag)
{
my_store_ptr(ref_pos,ref_length,record); // Position to row
record+= sort_form->s->db_record_offset;
}
else if (!error)
file->position(sort_form->record[0]);
my_store_ptr(ref_pos,ref_length,record); // Position to row
record+= sort_form->s->db_record_offset;
}
else if (!error)
{
/*
If filesort is using a temp table, write the row to the temp table,
and save its row position
*/
if (fs_tmp_table)
error= write_fs_tmp_table_row(thd, fs_info);
else
file->position(sort_form->record[0]);
}
if (error && error != HA_ERR_RECORD_DELETED)
break;
break;
}
if (thd->check_killed())
......@@ -904,7 +939,19 @@ static ha_rows find_all_keys(THD *thd, Sort_param *param, SQL_SELECT *select,
DBUG_RETURN(retval);
err:
if (!quick_select)
{
if (file->inited)
{
(void)file->extra(HA_EXTRA_NO_CACHE); /* End cacheing of records */
if (!next_pos)
file->ha_rnd_end();
}
if (fs_tmp_table && fs_tmp_table->file->inited)
fs_tmp_table->file->ha_rnd_end();
}
sort_form->column_bitmaps_set(save_read_set, save_write_set, save_vcol_set);
free_fs_tmp_table(thd, fs_info);
DBUG_RETURN(HA_POS_ERROR);
} /* find_all_keys */
......@@ -1295,6 +1342,199 @@ static void register_used_fields(Sort_param *param)
}
/**
Register the filesort temp table fields in the sorted table's read set
@param fs_info Filesort information that includes the filesort
temp table and an array of its fields.
*/
static void register_tmp_table_fields(SORT_INFO *fs_info)
{
DBUG_ENTER("register_tmp_table_fields");
if (fs_info->fs_tmp_table)
{
Copy_field *tmp_field= fs_info->tmp_field;
for (; tmp_field->from_field; tmp_field++)
{
/* Register the corresponding field in the original table */
bitmap_fast_test_and_set(tmp_field->from_field->table->read_set,
tmp_field->from_field->field_index);
}
}
DBUG_VOID_RETURN;
}
/**
Potentially create a filesort temp table to avoid rnd_pos() calls on the
table to be sorted
@param param Sort information and parameters.
@param fs_info Filesort information that includes the filesort
temp table and an array of its fields.
@retval
0 Temp table creation succeeded or temp table is
unnecessary.
@retval
1 Memory allocation failed or temp table creation failed.
*/
static int create_fs_tmp_table_if_needed(THD *thd, Sort_param *param,
SORT_INFO *fs_info)
{
TABLE *table= param->sort_form;
DBUG_ENTER("create_fs_tmp_table_if_needed");
if (fs_info->tmp_fields && table->file->ha_is_rnd_pos_expensive())
{
/*
Create a filesort temp table to avoid expensive rnd_pos() calls
on the table to be sorted
*/
Copy_field *tmp_field;
List<Item> tmp_field_list;
Item_field *item_field;
Field **pfield;
Field *field;
/* Allocate memory for the temp table field array */
tmp_field= (Copy_field *)
my_malloc(sizeof(Copy_field) * (fs_info->tmp_fields + 1),
MYF(MY_WME | MY_THREAD_SPECIFIC));
if (!tmp_field)
DBUG_RETURN(1);
fs_info->tmp_field= tmp_field;
/* Initialize the field array elements */
for (pfield= table->field; (field= *pfield); pfield++)
{
if (!bitmap_is_set(table->read_set, field->field_index))
continue;
/*
All fields referenced in the query are to be written
to the temp table
*/
tmp_field->from_field= field;
tmp_field++;
}
tmp_field->from_field= 0; // Put end marker
/* Create the temp table field list */
for (tmp_field= fs_info->tmp_field; tmp_field->from_field; tmp_field++)
{
item_field= new (thd->mem_root) Item_field(thd, tmp_field->from_field);
if (tmp_field_list.push_back(item_field, thd->mem_root))
{
free_fs_tmp_table(thd, fs_info);
DBUG_RETURN(1);
}
}
TMP_TABLE_PARAM tmp_table_param;
tmp_table_param.init();
tmp_table_param.field_count= fs_info->tmp_fields;
tmp_table_param.table_charset= table->s->table_charset;
tmp_table_param.skip_create_table= TRUE;
/* Create the filesort temp table */
TABLE *fs_tmp_table= create_tmp_table(thd, &tmp_table_param,
tmp_field_list,
NULL,
FALSE,
FALSE,
thd->variables.option_bits |
TMP_TABLE_ALL_COLUMNS,
param->max_rows, &empty_clex_str,
FALSE, FALSE);
if (!fs_tmp_table)
{
free_fs_tmp_table(thd, fs_info);
DBUG_RETURN(1);
}
/* Fill in the pointers to the temp table fields in the field array */
for (tmp_field= fs_info->tmp_field, pfield= fs_tmp_table->field;
(field= *pfield);
tmp_field++, pfield++)
tmp_field->set(field, tmp_field->from_field, FALSE);
fs_info->fs_tmp_table= fs_tmp_table;
/* Fix up the sort buffer parameters */
param->update_ref_length(fs_tmp_table->file->ref_length);
fs_tmp_table->prepare_for_position();
}
DBUG_RETURN(0);
}
/**
Copy column values from the current row of the table being sorted
to the current filesort temp table row. Write the row to the
filesort temp table.
@param fs_info Filesort information that includes the filesort
temp table and an array of its fields.
@retval
0 Temp table row was created and successfully written.
@retval
<> 0 Temp table write failed.
*/
static int write_fs_tmp_table_row(THD *thd, SORT_INFO *fs_info)
{
TABLE *fs_tmp_table= fs_info->fs_tmp_table;
Copy_field *tmp_field;
int error;
DBUG_ENTER("write_fs_tmp_table_row");
/*
Copy each column value present in the temp table
from the table being sorted
*/
for (tmp_field= fs_info->tmp_field; tmp_field->from_field; tmp_field++)
tmp_field->do_copy(tmp_field);
/* Write the temp table row */
error= fs_tmp_table->file->ha_write_tmp_row(fs_tmp_table->record[0]);
if (error)
DBUG_RETURN(error);
/* Save the written row's position in the temp table */
fs_tmp_table->file->position(fs_tmp_table->record[0]);
DBUG_RETURN(0);
}
/**
Free the filesort temp table and its information structures.
@param thd Thread handle.
@param fs_info Filesort information that includes the filesort
temp table and an array of its fields.
*/
static void free_fs_tmp_table(THD *thd, SORT_INFO *fs_info)
{
if (fs_info->fs_tmp_table)
{
free_tmp_table(thd, fs_info->fs_tmp_table);
fs_info->fs_tmp_table= NULL;
}
my_free(fs_info->tmp_field);
fs_info->tmp_field= NULL;
fs_info->tmp_fields= 0;
}
static bool save_index(Sort_param *param, uint count,
SORT_INFO *table_sort)
{
......@@ -2010,6 +2250,8 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
@param ptabfield Array of references to the table fields
@param sortlength Total length of sorted fields
@param [out] addon_buf Buffer to us for appended fields
@param [out] ptmp_fields Pointer to the number of temp table fields,
if any
@note
The null bits for the appended values are supposed to be put together
......@@ -2023,20 +2265,25 @@ sortlength(THD *thd, SORT_FIELD *sortorder, uint s_length,
static SORT_ADDON_FIELD *
get_addon_fields(ulong max_length_for_sort_data,
Field **ptabfield, uint sortlength, LEX_STRING *addon_buf)
Field **ptabfield, uint sortlength, LEX_STRING *addon_buf,
uint *ptmp_fields)
{
Field **pfield;
Field *field;
SORT_ADDON_FIELD *addonf;
uint length= 0;
uint fields= 0;
SORT_ADDON_FIELD *addonf= NULL;
uint addon_length= 0;
uint addon_fields= 0;
uint null_fields= 0;
uint nonaddon_fields= 0;
bool has_blob_field= FALSE;
MY_BITMAP *read_set= (*ptabfield)->table->read_set;
DBUG_ENTER("get_addon_fields");
/*
If there is a reference to a field in the query add it
to the the set of appended fields.
If there is a reference to a field in the query that is not a blob/text
field, add it to the the set of appended fields.
We cannot use addons if there is a blob/text field.
All referenced fields are written to the temp table.
Note for future refinement:
This this a too strong condition.
Actually we need only the fields referred in the
......@@ -2051,34 +2298,52 @@ get_addon_fields(ulong max_length_for_sort_data,
{
if (!bitmap_is_set(read_set, field->field_index))
continue;
if (field->flags & BLOB_FLAG)
DBUG_RETURN(0);
length+= field->max_packed_col_length(field->pack_length());
if (field->maybe_null())
null_fields++;
fields++;
}
if (!fields)
DBUG_RETURN(0);
length+= (null_fields+7)/8;
if (has_blob_field)
nonaddon_fields++;
else if (field->flags & BLOB_FLAG)
{
has_blob_field= TRUE;
nonaddon_fields= (addon_fields + 1);
null_fields= 0;
addon_fields= 0;
addon_length= 0;
}
else
{
addon_length+= field->max_packed_col_length(field->pack_length());
addon_fields++;
if (field->maybe_null())
null_fields++;
}
}
if (nonaddon_fields)
*ptmp_fields= addon_fields + nonaddon_fields; // Total number of fields
else
*ptmp_fields= 0; // Temp table is unnecessary
if (!addon_fields)
DBUG_RETURN(NULL);
if (length+sortlength > max_length_for_sort_data ||
addon_length+= (null_fields+7)/8;
if (addon_length+sortlength > max_length_for_sort_data ||
!my_multi_malloc(MYF(MY_WME | MY_THREAD_SPECIFIC),
&addonf, sizeof(SORT_ADDON_FIELD) * (fields+1),
&addon_buf->str, length,
&addonf, sizeof(SORT_ADDON_FIELD) * (addon_fields+1),
&addon_buf->str, addon_length,
NullS))
{
*ptmp_fields= addon_fields + nonaddon_fields; // Total number of fields
DBUG_RETURN(NULL);
}
DBUG_RETURN(0);
addon_buf->length= length;
length= (null_fields+7)/8;
addon_buf->length= addon_length;
addon_length= (null_fields+7)/8;
null_fields= 0;
for (pfield= ptabfield; (field= *pfield) ; pfield++)
{
if (!bitmap_is_set(read_set, field->field_index))
continue;
addonf->field= field;
addonf->offset= length;
addonf->offset= addon_length;
if (field->maybe_null())
{
addonf->null_offset= null_fields/8;
......@@ -2091,13 +2356,13 @@ get_addon_fields(ulong max_length_for_sort_data,
addonf->null_bit= 0;
}
addonf->length= field->max_packed_col_length(field->pack_length());
length+= addonf->length;
addon_length+= addonf->length;
addonf++;
}
addonf->field= 0; // Put end marker
DBUG_PRINT("info",("addon_length: %d",length));
DBUG_RETURN(addonf-fields);
DBUG_PRINT("info",("addon_length: %d",addon_length));
DBUG_RETURN(addonf-addon_fields);
}
......
......@@ -27,6 +27,7 @@ class Filesort_tracker;
struct SORT_FIELD;
typedef struct st_order ORDER;
class JOIN;
class Copy_field;
/**
......@@ -87,7 +88,8 @@ class SORT_INFO
public:
SORT_INFO()
:addon_field(0), record_pointers(0)
:addon_field(0), record_pointers(0),
fs_tmp_table(NULL), tmp_field(NULL), tmp_fields(0)
{
buffpek.str= 0;
my_b_clear(&io_cache);
......@@ -101,6 +103,9 @@ class SORT_INFO
my_free(record_pointers);
my_free(buffpek.str);
my_free(addon_field);
fs_tmp_table= NULL; // Freed in end_read_record()
tmp_field= NULL; // Freed in end_read_record()
tmp_fields= 0;
}
void reset()
......@@ -119,6 +124,11 @@ class SORT_INFO
/* To unpack back */
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
uchar *record_pointers; /* If sorted in memory */
TABLE *fs_tmp_table; /* Optional temp table used by filesort to */
/* eliminate rnd_pos() calls to the table
/* being sorted */
Copy_field *tmp_field; /* Filesort temp table field array */
uint tmp_fields; /* Number of filesort temp table fields */
/*
How many rows in final result.
Also how many rows in record_pointers, if used
......
......@@ -5185,6 +5185,36 @@ int ha_partition::rnd_pos_by_record(uchar *record)
}
/*
Determine whether a call to rnd_pos() is expensive
SYNOPSIS
is_rnd_pos_expensive()
RETURN VALUE
FALSE No inherent inefficiencies in rnd_pos()
TRUE rnd_pos() call is inefficient
DESCRIPTION
Some engines, such as Spider, have an inefficient implementation of
rnd_pos(), because they need to do a remote access to fetch the
single table row. Determine whether the rnd_pos() implementation
for any of the partitions is expensive.
*/
bool ha_partition::is_rnd_pos_expensive()
{
DBUG_ENTER("ha_partition::is_rnd_pos_expensive");
uint i;
for (i= 0; i < m_tot_parts; i++)
if (m_file[i]->ha_is_rnd_pos_expensive())
DBUG_RETURN(TRUE);
DBUG_RETURN(FALSE);
}
/****************************************************************************
MODULE index scan
****************************************************************************/
......
......@@ -691,6 +691,7 @@ class ha_partition :public handler
virtual int rnd_next(uchar * buf);
virtual int rnd_pos(uchar * buf, uchar * pos);
virtual int rnd_pos_by_record(uchar *record);
virtual bool is_rnd_pos_expensive();
virtual void position(const uchar * record);
/*
......
......@@ -3536,6 +3536,17 @@ class handler :public Sql_alloc
position(record);
return rnd_pos(record, ref);
}
/**
Some engines are unable to provide an efficient implementation
for rnd_pos(). Spider is such an engine, as a call to rnd_pos()
needs to access a table on a remote data node to retrieve the
single table row.
*/
virtual bool is_rnd_pos_expensive()
{
/* Engine's rnd_pos() implementation has no inherent inefficiencies */
return FALSE;
}
virtual int read_first_row(uchar *buf, uint primary_key);
public:
......@@ -3545,6 +3556,10 @@ class handler :public Sql_alloc
int ha_rnd_next(uchar *buf);
int ha_rnd_pos(uchar *buf, uchar *pos);
inline int ha_rnd_pos_by_record(uchar *buf);
inline bool ha_is_rnd_pos_expensive()
{
return is_rnd_pos_expensive();
}
inline int ha_read_first_row(uchar *buf, uint primary_key);
/**
......
......@@ -38,16 +38,21 @@
static int rr_quick(READ_RECORD *info);
int rr_sequential(READ_RECORD *info);
static int rr_from_tempfile(READ_RECORD *info);
static int rr_from_tempfile_and_copy(READ_RECORD *info);
static int rr_unpack_from_tempfile(READ_RECORD *info);
static int rr_unpack_from_buffer(READ_RECORD *info);
int rr_from_pointers(READ_RECORD *info);
int rr_from_pointers_and_copy(READ_RECORD *info);
static int rr_from_cache(READ_RECORD *info);
static int rr_from_cache_and_copy(READ_RECORD *info);
static int init_rr_cache(THD *thd, READ_RECORD *info);
static int rr_cmp(uchar *a,uchar *b);
static int rr_index_first(READ_RECORD *info);
static int rr_index_last(READ_RECORD *info);
static int rr_index(READ_RECORD *info);
static int rr_index_desc(READ_RECORD *info);
static int init_copy(READ_RECORD *info);
static void end_copy(READ_RECORD *info);
/**
......@@ -77,6 +82,11 @@ bool init_read_record_idx(READ_RECORD *info, THD *thd, TABLE *table,
bzero((char*) info,sizeof(*info));
info->thd= thd;
info->table= table;
info->copy_table= NULL;
info->tmp_field= NULL;
info->tmp_fields= 0;
info->free_tmp_table= FALSE;
info->addon_field= NULL;
info->record= table->record[0];
info->print_error= print_error;
info->unlock_row= rr_unlock_row;
......@@ -188,13 +198,39 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
bool disable_rr_cache)
{
IO_CACHE *tempfile;
SORT_ADDON_FIELD *addon_field= filesort ? filesort->addon_field : 0;
SORT_ADDON_FIELD *addon_field;
bool has_fs_tmp_table;
DBUG_ENTER("init_read_record");
bzero((char*) info,sizeof(*info));
info->thd=thd;
if (filesort)
{
if (filesort->fs_tmp_table)
{
has_fs_tmp_table= TRUE;
info->copy_table= table;
table= filesort->fs_tmp_table;
}
else
{
has_fs_tmp_table= FALSE;
info->copy_table= NULL;
}
info->tmp_field= filesort->tmp_field;
info->tmp_fields= filesort->tmp_fields;
addon_field= filesort->addon_field;
}
else
{
has_fs_tmp_table= FALSE;
info->copy_table= NULL;
info->tmp_field= NULL;
info->tmp_fields= 0;
addon_field= NULL;
}
info->free_tmp_table= has_fs_tmp_table;
info->table=table;
info->forms= &info->table; /* Only one table */
info->addon_field= addon_field;
if ((table->s->tmp_table == INTERNAL_TMP_TABLE ||
......@@ -230,13 +266,18 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
{
DBUG_PRINT("info",("using rr_from_tempfile"));
info->read_record_func=
addon_field ? rr_unpack_from_tempfile : rr_from_tempfile;
addon_field ? rr_unpack_from_tempfile :
has_fs_tmp_table ? rr_from_tempfile_and_copy :
rr_from_tempfile;
info->io_cache= tempfile;
reinit_io_cache(info->io_cache,READ_CACHE,0L,0,0);
info->ref_pos=table->file->ref;
info->ref_pos= table->file->ref;
if (!table->file->inited)
if (table->file->ha_rnd_init_with_error(0))
DBUG_RETURN(1);
if (has_fs_tmp_table)
if (init_copy(info))
DBUG_RETURN(1);
/*
addon_field is checked because if we use addon fields,
......@@ -245,22 +286,26 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
*/
if (!disable_rr_cache &&
!addon_field &&
thd->variables.read_rnd_buff_size &&
!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
(table->db_stat & HA_READ_ONLY ||
table->reginfo.lock_type <= TL_READ_NO_INSERT) &&
(ulonglong) table->s->reclength* (table->file->stats.records+
table->file->stats.deleted) >
(ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE &&
info->io_cache->end_of_file/info->ref_length * table->s->reclength >
(my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE &&
!table->s->blob_fields &&
thd->variables.read_rnd_buff_size &&
!(table->file->ha_table_flags() & HA_FAST_KEY_READ) &&
(table->db_stat & HA_READ_ONLY ||
table->reginfo.lock_type <= TL_READ_NO_INSERT) &&
(ulonglong) table->s->reclength*
(table->file->stats.records+
table->file->stats.deleted) >
(ulonglong) MIN_FILE_LENGTH_TO_USE_ROW_CACHE &&
info->io_cache->end_of_file/info->ref_length *
table->s->reclength >
(my_off_t) MIN_ROWS_TO_USE_TABLE_CACHE &&
!table->s->blob_fields &&
info->ref_length <= MAX_REFLENGTH)
{
if (! init_rr_cache(thd, info))
{
DBUG_PRINT("info",("using rr_from_cache"));
info->read_record_func= rr_from_cache;
info->read_record_func=
has_fs_tmp_table ? rr_from_cache_and_copy :
rr_from_cache;
DBUG_PRINT("info",("using rr_from_cache"));
}
}
}
......@@ -272,13 +317,19 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
else if (filesort && filesort->record_pointers)
{
DBUG_PRINT("info",("using record_pointers"));
if (table->file->ha_rnd_init_with_error(0))
DBUG_RETURN(1);
if (!table->file->inited)
if (table->file->ha_rnd_init_with_error(0))
DBUG_RETURN(1);
info->cache_pos= filesort->record_pointers;
info->cache_end= (info->cache_pos+
filesort->return_rows * info->ref_length);
info->read_record_func=
addon_field ? rr_unpack_from_buffer : rr_from_pointers;
addon_field ? rr_unpack_from_buffer :
has_fs_tmp_table ? rr_from_pointers_and_copy :
rr_from_pointers;
if (has_fs_tmp_table)
if (init_copy(info))
DBUG_RETURN(1);
}
else if (table->file->keyread_enabled())
{
......@@ -300,11 +351,11 @@ bool init_read_record(READ_RECORD *info,THD *thd, TABLE *table,
DBUG_RETURN(1);
/* We can use record cache if we don't update dynamic length tables */
if (!table->no_cache &&
(use_record_cache > 0 ||
(int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY ||
!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) ||
(use_record_cache < 0 &&
!(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE))))
(use_record_cache > 0 ||
(int) table->reginfo.lock_type <= (int) TL_READ_HIGH_PRIORITY ||
!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD) ||
(use_record_cache < 0 &&
!(table->file->ha_table_flags() & HA_NOT_DELETE_WITH_CACHE))))
(void) table->file->extra_opt(HA_EXTRA_CACHE,
thd->variables.read_buff_size);
}
......@@ -333,6 +384,15 @@ void end_read_record(READ_RECORD *info)
(void) info->table->file->extra(HA_EXTRA_NO_CACHE);
if (info->read_record_func != rr_quick) // otherwise quick_range does it
(void) info->table->file->ha_index_or_rnd_end();
if (info->free_tmp_table)
{
free_tmp_table(info->thd, info->table);
end_copy(info);
my_free(info->tmp_field);
info->tmp_field= NULL;
info->tmp_fields= 0;
info->free_tmp_table= FALSE;
}
info->table=0;
}
}
......@@ -521,7 +581,35 @@ static int rr_from_tempfile(READ_RECORD *info)
/**
Read a result set record from a temporary file after sorting.
The function first reads the next sorted record from the temporary file.
The function first reads the next sorted record from the temporary file
into a buffer. If successful, it copies the fields to the
table being sorted.
@param info Reference to the context including record descriptors
@retval
0 Record successfully read.
@retval
-1 No more records to read or record read failed.
*/
int rr_from_tempfile_and_copy(READ_RECORD *info)
{
int error;
if ((error= rr_from_tempfile(info)))
return error;
for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
(*cp->do_copy)(cp);
return error;
}
/**
Read a result set record from a temporary file after sorting.
The function first reads the next sorted record from the temporary file
into a buffer. If a success it calls a callback function that unpacks
the fields values use in the result set from this buffer into their
positions in the regular record buffer.
......@@ -569,6 +657,35 @@ int rr_from_pointers(READ_RECORD *info)
return tmp;
}
/**
Read a result set record from a temporary file after sorting.
The function first reads the next sorted record from the temporary file
into a buffer. If successful, it copies the fields to the
table being sorted.
@param info Reference to the context including record descriptors
@retval
0 Record successfully read.
@retval
-1 No more records to read or record read failed.
*/
int rr_from_pointers_and_copy(READ_RECORD *info)
{
int error;
if ((error= rr_from_pointers(info)))
return error;
for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
(*cp->do_copy)(cp);
return error;
}
/**
Read a result set record from a buffer after sorting.
......@@ -703,6 +820,114 @@ static int rr_from_cache(READ_RECORD *info)
} /* rr_from_cache */
/**
Read a result set record from cache after sorting.
The function first reads the next sorted record from cache.
If successful, it copies the fields to the table being sorted.
@param info Reference to the context including record descriptors
@retval
0 Record successfully read.
@retval
-1 No more records to read or record read failed.
*/
int rr_from_cache_and_copy(READ_RECORD *info)
{
int error;
if ((error= rr_from_cache(info)))
return error;
for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++)
(*cp->do_copy)(cp);
return error;
}
/**
Set up for copying the fields of the current row
from the filesort temp table to the table being sorted.
@param info Reference to the context including record descriptors
@retval
0 Success.
@retval
1 Memory allocation failure.
*/
static int init_copy(READ_RECORD *info)
{
TABLE *table= info->copy_table;
Copy_field *tmp_field;
Copy_field *copy_field;
MY_BITMAP *write_set;
my_bitmap_map *column_bitmap= NULL;
/* Allocate the memory for the copy_field descriptors */
copy_field= (Copy_field *) my_malloc(sizeof(Copy_field) * info->tmp_fields,
MYF(MY_WME | MY_THREAD_SPECIFIC));
if (!copy_field)
return 1;
info->copy_field= copy_field;
/* Allocate the memory for the updated table write set */
if (!(write_set= (MY_BITMAP *)
my_malloc(sizeof(MY_BITMAP),
MYF(MY_WME | MY_THREAD_SPECIFIC))))
{
my_free(copy_field);
return 1;
}
/* Initialize the column bitmap for the updated table write set */
my_bitmap_init(write_set, column_bitmap, table->s->fields, FALSE);
info->save_write_set= table->write_set;
table->column_bitmaps_set_no_signal(table->read_set, write_set);
/*
Each column value present in the temp table needs to be copied
to the table being sorted
*/
for (tmp_field= info->tmp_field; tmp_field->from_field; tmp_field++)
{
bitmap_fast_test_and_set(table->write_set,
tmp_field->from_field->field_index);
copy_field->set(tmp_field->from_field, tmp_field->to_field, FALSE);
copy_field++;
}
table->file->column_bitmaps_signal();
info->copy_field_end= copy_field;
return 0;
}
/**
Do cleanup at the completion of copying field values from the
filesort temp table to the table being sorted.
@param info Reference to the context including record descriptors
*/
static void end_copy(READ_RECORD *info)
{
TABLE *table= info->copy_table;
MY_BITMAP *write_set= table->write_set;
table->column_bitmaps_set(table->read_set, info->save_write_set);
my_bitmap_free(write_set);
my_free(write_set);
my_free(info->copy_field);
info->copy_table= NULL;
info->save_write_set= NULL;
info->copy_field= info->copy_field_end= NULL;
}
static int rr_cmp(uchar *a,uchar *b)
{
if (a[0] != b[0])
......
......@@ -27,6 +27,8 @@ class SQL_SELECT;
class Copy_field;
class SORT_INFO;
#include "my_bitmap.h"
struct READ_RECORD;
void end_read_record(READ_RECORD *info);
......@@ -53,7 +55,9 @@ struct READ_RECORD
TABLE *table; /* Head-form */
//handler *file;
TABLE **forms; /* head and ref forms */
TABLE *copy_table; /* Original table that a */
/* filesort temp table */
/* row is copied to */
Unlock_row_func unlock_row;
Read_func read_record_func;
THD *thd;
......@@ -61,25 +65,38 @@ struct READ_RECORD
uint cache_records;
uint ref_length,struct_length,reclength,rec_cache_size,error_offset;
uint index;
uchar *ref_pos; /* pointer to form->refpos */
uint tmp_fields; /* Number of filesort temp
table fields */
MY_BITMAP *save_write_set; /* Original write set when */
/* records are read from /*
/* the filesort temp table */
/* and copied to the */
/* original table */
uchar *ref_pos; /* Pointer to form->refpos */
uchar *record;
uchar *rec_buf; /* to read field values after filesort */
uchar *cache,*cache_pos,*cache_end,*read_positions;
struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
struct st_sort_addon_field *addon_field; /* Pointer to the fields info */
Copy_field *tmp_field; /* Filesort temp table */
/* field array */
struct st_io_cache *io_cache;
bool print_error, ignore_not_found_rows;
bool print_error, ignore_not_found_rows, free_tmp_table;
void (*unpack)(struct st_sort_addon_field *, uchar *, uchar *);
int read_record() { return read_record_func(this); }
/*
/*
SJ-Materialization runtime may need to read fields from the materialized
table and unpack them into original table fields:
table and unpack them into original table fields.
Read following a filesort may need to read fields from its temp table
and unpack them into the corresponding original table fields.
*/
Copy_field *copy_field;
Copy_field *copy_field_end;
public:
READ_RECORD() : table(NULL), cache(NULL) {}
READ_RECORD()
: table(NULL), cache(NULL), copy_field(NULL), copy_field_end(NULL) {}
~READ_RECORD() { end_read_record(this); }
};
......
......@@ -5767,7 +5767,6 @@ class user_var_entry
user_var_entry *get_variable(HASH *hash, LEX_CSTRING *name,
bool create_if_not_exists);
class SORT_INFO;
class multi_delete :public select_result_interceptor
{
TABLE_LIST *delete_tables, *table_being_deleted;
......
......@@ -93,7 +93,24 @@ class Sort_param {
}
void init_for_filesort(uint sortlen, TABLE *table,
ulong max_length_for_sort_data,
ha_rows maxrows, bool sort_positions);
ha_rows maxrows, bool sort_positions,
uint *tmp_fields);
void update_ref_length(uint new_ref_length)
{
if (!addon_field)
{
if (ref_length)
{
res_length-= ref_length;
sort_length-= ref_length;
rec_length-= ref_length;
}
res_length+= new_ref_length;
sort_length+= new_ref_length;
rec_length+= new_ref_length;
ref_length= new_ref_length;
}
}
};
......
......@@ -429,6 +429,15 @@ class ha_spider: public handler
KEY_MULTI_RANGE **found_range_p
);
#endif
/**
Spider's implementation of rnd_pos() is inherently inefficient.
A call to rnd_pos() needs to access a table on a remote data node
to retrieve the single table row.
*/
virtual bool is_rnd_pos_expensive()
{
return TRUE;
}
int rnd_init(
bool scan
);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment