Commit 1a8a63d0 authored by marko's avatar marko

branches/zip: Reimplement merge sort in fast index creation.

The creation of the primary key does not work.  We will have to flag
externally stored columns and copy the externally stored part from
the old table.

row_build_index_for_mysql(): Rename to row_merge_build_indexes().
Move from row0mysql.c to row0merge.c.

Remove private declarations from row0merge.h.  Make many functions static
in row0merge.c.

cmp_rec_rec_simple(): A new comparison function.

dict_index_get_min_size(): New function.

OS_FILE_FROM_FD(fd): A macro for converting from int to os_file_t.

rec_convert_dtuple_to_rec_comp(): Make the interface lower-level.

rec_get_converted_size_comp(): Return also extra_size.

UT_SORT_FUNCTION_BODY(): Remove reference to an obsolete test program.

row_rec_to_index_entry_low(): New function.

row0merge.c: Implement merge sort based on file streams instead of
fixed-size blocks.  Sort the small blocks as arrays of dfield_t*,
because it is faster than invoking rec_get_offsets() for every
comparison.
parent 673f836f
...@@ -8283,7 +8283,7 @@ err_exit: ...@@ -8283,7 +8283,7 @@ err_exit:
/* Read clustered index of the table and build indexes /* Read clustered index of the table and build indexes
based on this information using temporary files and merge based on this information using temporary files and merge
sort.*/ sort.*/
error = row_build_index_for_mysql( error = row_merge_build_indexes(
trx, innodb_table, indexed_table, index, trx, innodb_table, indexed_table, index,
num_of_idx); num_of_idx);
......
...@@ -656,6 +656,14 @@ dict_table_get_sys_col_no( ...@@ -656,6 +656,14 @@ dict_table_get_sys_col_no(
const dict_table_t* table, /* in: table */ const dict_table_t* table, /* in: table */
ulint sys); /* in: DATA_ROW_ID, ... */ ulint sys); /* in: DATA_ROW_ID, ... */
/************************************************************************ /************************************************************************
Returns the minimum data size of an index record. */
UNIV_INLINE
ulint
dict_index_get_min_size(
/*====================*/
/* out: minimum data size in bytes */
const dict_index_t* index); /* in: index */
/************************************************************************
Check whether the table uses the compact page format. */ Check whether the table uses the compact page format. */
UNIV_INLINE UNIV_INLINE
ibool ibool
......
...@@ -507,6 +507,26 @@ dict_index_get_nth_col_no( ...@@ -507,6 +507,26 @@ dict_index_get_nth_col_no(
return(dict_col_get_no(dict_index_get_nth_col(index, pos))); return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
} }
/************************************************************************
Returns the minimum data size of an index record. */
UNIV_INLINE
ulint
dict_index_get_min_size(
/*====================*/
/* out: minimum data size in bytes */
const dict_index_t* index) /* in: index */
{
ulint n = dict_index_get_n_fields(index);
ulint size = 0;
while (n--) {
size += dict_col_get_min_size(dict_index_get_nth_col(index,
n));
}
return(size);
}
/************************************************************************* /*************************************************************************
Gets the space id of the root of the index tree. */ Gets the space id of the root of the index tree. */
UNIV_INLINE UNIV_INLINE
......
...@@ -43,8 +43,10 @@ extern ulint os_n_pending_writes; ...@@ -43,8 +43,10 @@ extern ulint os_n_pending_writes;
#ifdef __WIN__ #ifdef __WIN__
#define os_file_t HANDLE #define os_file_t HANDLE
#define OS_FILE_FROM_FD(fd) _get_osfhandle(fd)
#else #else
typedef int os_file_t; typedef int os_file_t;
#define OS_FILE_FROM_FD(fd) fd
#endif #endif
extern ulint os_innodb_umask; extern ulint os_innodb_umask;
......
...@@ -125,6 +125,22 @@ cmp_dtuple_is_prefix_of_rec( ...@@ -125,6 +125,22 @@ cmp_dtuple_is_prefix_of_rec(
const dtuple_t* dtuple, /* in: data tuple */ const dtuple_t* dtuple, /* in: data tuple */
const rec_t* rec, /* in: physical record */ const rec_t* rec, /* in: physical record */
const ulint* offsets);/* in: array returned by rec_get_offsets() */ const ulint* offsets);/* in: array returned by rec_get_offsets() */
#ifndef UNIV_HOTBACKUP
/*****************************************************************
Compare two physical records that contain the same number of columns,
none of which are stored externally. */
int
cmp_rec_rec_simple(
/*===============*/
/* out: 1, 0 , -1 if rec1 is greater, equal,
less, respectively, than rec2 */
const rec_t* rec1, /* in: physical record */
const rec_t* rec2, /* in: physical record */
const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index); /* in: data dictionary index */
#endif /* !UNIV_HOTBACKUP */
/***************************************************************** /*****************************************************************
This function is used to compare two physical records. Only the common This function is used to compare two physical records. Only the common
first fields are compared, and if an externally stored field is first fields are compared, and if an externally stored field is
......
...@@ -607,16 +607,17 @@ rec_fold( ...@@ -607,16 +607,17 @@ rec_fold(
/************************************************************* /*************************************************************
Builds a ROW_FORMAT=COMPACT record out of a data tuple. */ Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
byte* void
rec_convert_dtuple_to_rec_comp( rec_convert_dtuple_to_rec_comp(
/*===========================*/ /*===========================*/
/* out: pointer to the start of data payload */ rec_t* rec, /* in: origin of record */
byte* buf, /* in: start address of the data area */
ulint extra, /* in: number of bytes to reserve between ulint extra, /* in: number of bytes to reserve between
the record header and the data payload the record header and the data payload
(usually REC_N_NEW_EXTRA_BYTES) */ (usually REC_N_NEW_EXTRA_BYTES) */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
const dtuple_t* dtuple, /* in: data tuple */ ulint status, /* in: status bits of the record */
const dfield_t* fields, /* in: array of data fields */
ulint n_fields,/* in: number of data fields */
const ulint* ext, /* in: array of extern field numbers, const ulint* ext, /* in: array of extern field numbers,
in ascending order */ in ascending order */
ulint n_ext); /* in: number of elements in ext */ ulint n_ext); /* in: number of elements in ext */
...@@ -657,9 +658,12 @@ rec_get_converted_size_comp( ...@@ -657,9 +658,12 @@ rec_get_converted_size_comp(
/* out: size */ /* out: size */
dict_index_t* index, /* in: record descriptor; dict_index_t* index, /* in: record descriptor;
dict_table_is_comp() is assumed to hold */ dict_table_is_comp() is assumed to hold */
const dtuple_t* dtuple, /* in: data tuple */ ulint status, /* in: status bits of the record */
const dfield_t* fields, /* in: array of data fields */
ulint n_fields,/* in: number of data fields */
const ulint* ext, /* in: array of extern field numbers */ const ulint* ext, /* in: array of extern field numbers */
ulint n_ext); /* in: number of elements in ext */ ulint n_ext, /* in: number of elements in ext */
ulint* extra); /* out: extra size */
/************************************************************** /**************************************************************
The following function returns the size of a data tuple when converted to The following function returns the size of a data tuple when converted to
a physical record. */ a physical record. */
......
...@@ -1538,7 +1538,12 @@ rec_get_converted_size( ...@@ -1538,7 +1538,12 @@ rec_get_converted_size(
: dict_index_get_n_fields(index))); : dict_index_get_n_fields(index)));
if (dict_table_is_comp(index->table)) { if (dict_table_is_comp(index->table)) {
return(rec_get_converted_size_comp(index, dtuple, ext, n_ext)); return(rec_get_converted_size_comp(index,
dtuple_get_info_bits(dtuple)
& REC_NEW_STATUS_MASK,
dtuple->fields,
dtuple->n_fields,
ext, n_ext, NULL));
} }
data_size = dtuple_get_data_size(dtuple); data_size = dtuple_get_data_size(dtuple);
......
...@@ -21,17 +21,6 @@ Created 13/06/2005 Jan Lindstrom ...@@ -21,17 +21,6 @@ Created 13/06/2005 Jan Lindstrom
#include "btr0types.h" #include "btr0types.h"
#include "row0mysql.h" #include "row0mysql.h"
/* Information about temporary files used in merge sort are stored
to this structure */
struct merge_file_struct {
os_file_t file; /* File descriptor */
ulint offset; /* File offset */
ulint num_of_blocks; /* Number of blocks */
};
typedef struct merge_file_struct merge_file_t;
/* This structure holds index field definitions */ /* This structure holds index field definitions */
struct merge_index_field_struct { struct merge_index_field_struct {
...@@ -53,48 +42,6 @@ struct merge_index_def_struct { ...@@ -53,48 +42,6 @@ struct merge_index_def_struct {
typedef struct merge_index_def_struct merge_index_def_t; typedef struct merge_index_def_struct merge_index_def_t;
/************************************************************************
Reads clustered index of the table and create temporary files
containing index entries for indexes to be built. */
ulint
row_merge_read_clustered_index(
/*===========================*/
/* out: DB_SUCCESS if successfull,
or ERROR code */
trx_t* trx, /* in: transaction */
dict_table_t* table, /* in: table where index is created */
dict_index_t** index, /* in: indexes to be created */
merge_file_t* files, /* in: Files where to write index
entries */
ulint num_of_idx); /* in: number of indexes to be
created */
/************************************************************************
Read sorted file containing index data tuples and insert these data
data tuples to the index */
ulint
row_merge_insert_index_tuples(
/*==========================*/
/* out: 0 or error number */
trx_t* trx, /* in: transaction */
dict_index_t* index, /* in: index */
dict_table_t* table, /* in: table */
os_file_t file, /* in: file handle */
ulint offset); /* in: offset where to start
reading */
/*****************************************************************
Merge sort for linked list in the disk. */
ulint
row_merge_sort_linked_list_in_disk(
/*===============================*/
/* out: offset to first block in
the list or ULINT_UNDEFINED in
case of error */
dict_index_t* index, /* in: index to be created */
os_file_t file, /* in: File handle */
int* error); /* out: 0 or error */
/************************************************************************* /*************************************************************************
Drop an index from the InnoDB system tables. */ Drop an index from the InnoDB system tables. */
...@@ -116,13 +63,6 @@ row_merge_drop_indexes( ...@@ -116,13 +63,6 @@ row_merge_drop_indexes(
dict_table_t* table, /* in: table containing the indexes */ dict_table_t* table, /* in: table containing the indexes */
dict_index_t** index, /* in: indexes to drop */ dict_index_t** index, /* in: indexes to drop */
ulint num_created); /* in: number of elements in index[] */ ulint num_created); /* in: number of elements in index[] */
/*************************************************************************
Initialize memory for a merge file structure */
void
row_merge_file_create(
/*==================*/
merge_file_t* merge_file); /* out: merge file structure */
/************************************************************************* /*************************************************************************
Create a temporary table using a definition of the old table. You must Create a temporary table using a definition of the old table. You must
...@@ -136,16 +76,7 @@ row_merge_create_temporary_table( ...@@ -136,16 +76,7 @@ row_merge_create_temporary_table(
dict_table_t* table, /* in: old table definition */ dict_table_t* table, /* in: old table definition */
trx_t* trx); /* in/out: trx (sets error_state) */ trx_t* trx); /* in/out: trx (sets error_state) */
/************************************************************************* /*************************************************************************
Update all prebuilts for this table */ Rename the indexes in the dictionary. */
void
row_merge_prebuilts_update(
/*=======================*/
trx_t* trx, /* in: trx */
dict_table_t* old_table); /* in: old table */
/*************************************************************************
Rename the indexes in the dicitionary. */
ulint ulint
row_merge_rename_index( row_merge_rename_index(
...@@ -155,7 +86,7 @@ row_merge_rename_index( ...@@ -155,7 +86,7 @@ row_merge_rename_index(
dict_table_t* table, /* in: Table for index */ dict_table_t* table, /* in: Table for index */
dict_index_t* index); /* in: Index to rename */ dict_index_t* index); /* in: Index to rename */
/************************************************************************* /*************************************************************************
Create the index and load in to the dicitionary. */ Create the index and load in to the dictionary. */
dict_index_t* dict_index_t*
row_merge_create_index( row_merge_create_index(
...@@ -166,7 +97,7 @@ row_merge_create_index( ...@@ -166,7 +97,7 @@ row_merge_create_index(
const merge_index_def_t* /* in: the index definition */ const merge_index_def_t* /* in: the index definition */
index_def); index_def);
/************************************************************************* /*************************************************************************
Check if a transaction can use an index.*/ Check if a transaction can use an index. */
ibool ibool
row_merge_is_index_usable( row_merge_is_index_usable(
...@@ -177,13 +108,31 @@ row_merge_is_index_usable( ...@@ -177,13 +108,31 @@ row_merge_is_index_usable(
const dict_index_t* index); /* in: index to check */ const dict_index_t* index); /* in: index to check */
/************************************************************************* /*************************************************************************
If there are views that refer to the old table name then we "attach" to If there are views that refer to the old table name then we "attach" to
the new instance of the table else we drop it immediately.*/ the new instance of the table else we drop it immediately. */
ulint ulint
row_merge_drop_table( row_merge_drop_table(
/*=================*/ /*=================*/
/* out: DB_SUCCESS if all OK else /* out: DB_SUCCESS or error code */
error code.*/
trx_t* trx, /* in: transaction */ trx_t* trx, /* in: transaction */
dict_table_t* table); /* in: table instance to drop */ dict_table_t* table); /* in: table instance to drop */
/*************************************************************************
Build indexes on a table by reading a clustered index,
creating a temporary file containing index entries, merge sorting
these index entries and inserting sorted index entries to indexes. */
ulint
row_merge_build_indexes(
/*====================*/
/* out: DB_SUCCESS or error code */
trx_t* trx, /* in: transaction */
dict_table_t* old_table, /* in: Table where rows are
read from */
dict_table_t* new_table, /* in: Table where indexes are
created. Note that old_table ==
new_table if we are creating a
secondary keys. */
dict_index_t** indexes, /* in: indexes to be created */
ulint n_indexes); /* in: size of indexes[] */
#endif /* row0merge.h */ #endif /* row0merge.h */
...@@ -503,25 +503,6 @@ row_check_table_for_mysql( ...@@ -503,25 +503,6 @@ row_check_table_for_mysql(
handle */ handle */
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
/************************************************************************* /*************************************************************************
Build new indexes to a table by reading a clustered index,
creating a temporary file containing index entries, merge sorting
these index entries and inserting sorted index entries to indexes. */
ulint
row_build_index_for_mysql(
/*======================*/
/* out: 0 or error code */
trx_t* trx, /* in: transaction */
dict_table_t* old_table, /* in: Table where rows are
read from */
dict_table_t* new_table, /* in: Table where indexes are
created. Note that old_table ==
new_table if we are creating a
secondary keys. */
dict_index_t** index, /* in: Indexes to be created */
ulint num_of_keys); /* in: Number of indexes to be
created */
/*************************************************************************
Create query graph for a index creation */ Create query graph for a index creation */
ulint ulint
......
...@@ -68,7 +68,7 @@ row_build_index_entry( ...@@ -68,7 +68,7 @@ row_build_index_entry(
mem_heap_t* heap); /* in: memory heap from which the memory for mem_heap_t* heap); /* in: memory heap from which the memory for
the index entry is allocated */ the index entry is allocated */
/*********************************************************************** /***********************************************************************
An inverse function to dict_row_build_index_entry. Builds a row from a An inverse function to row_build_index_entry. Builds a row from a
record in a clustered index. */ record in a clustered index. */
dtuple_t* dtuple_t*
...@@ -98,6 +98,21 @@ row_build( ...@@ -98,6 +98,21 @@ row_build(
/*********************************************************************** /***********************************************************************
Converts an index record to a typed data tuple. */ Converts an index record to a typed data tuple. */
dtuple_t*
row_rec_to_index_entry_low(
/*=======================*/
/* out, index entry built; does not
set info_bits, and the data fields in
the entry will point directly to rec */
const rec_t* rec, /* in: record in the index */
dict_index_t* index, /* in: index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
mem_heap_t* heap); /* in: memory heap from which the memory
needed is allocated */
/***********************************************************************
Converts an index record to a typed data tuple. NOTE that externally
stored (often big) fields are NOT copied to heap. */
dtuple_t* dtuple_t*
row_rec_to_index_entry( row_rec_to_index_entry(
/*===================*/ /*===================*/
......
...@@ -30,8 +30,7 @@ and the low (LOW), inclusive, and high (HIGH), noninclusive, ...@@ -30,8 +30,7 @@ and the low (LOW), inclusive, and high (HIGH), noninclusive,
limits for the sort interval as arguments. limits for the sort interval as arguments.
CMP_FUN is the comparison function name. It takes as arguments CMP_FUN is the comparison function name. It takes as arguments
two elements from the array and returns 1, if the first is bigger, two elements from the array and returns 1, if the first is bigger,
0 if equal, and -1 if the second bigger. For an eaxmaple of use 0 if equal, and -1 if the second bigger. */
see test program in tsut.c. */
#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\ #define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
{\ {\
......
...@@ -481,7 +481,7 @@ engine = innodb default charset=utf8; ...@@ -481,7 +481,7 @@ engine = innodb default charset=utf8;
insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
commit; commit;
alter table t1 add unique index (b); alter table t1 add unique index (b);
ERROR 23000: Duplicate entry '0' for key 'b' ERROR 23000: Duplicate entry '' for key 'b'
insert into t1 values(8,9,'fff','fff'); insert into t1 values(8,9,'fff','fff');
select * from t1; select * from t1;
a b c d a b c d
...@@ -650,7 +650,7 @@ engine = innodb default charset=ucs2; ...@@ -650,7 +650,7 @@ engine = innodb default charset=ucs2;
insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
commit; commit;
alter table t1 add unique index (b); alter table t1 add unique index (b);
ERROR 23000: Duplicate entry '0' for key 'b' ERROR 23000: Duplicate entry '' for key 'b'
show create table t1; show create table t1;
Table Create Table Table Create Table
t1 CREATE TABLE `t1` ( t1 CREATE TABLE `t1` (
......
...@@ -132,6 +132,7 @@ create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a ...@@ -132,6 +132,7 @@ create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a
engine = innodb default charset=utf8; engine = innodb default charset=utf8;
insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
commit; commit;
--replace_regex /Duplicate entry '[0-9]*'/Duplicate entry ''/
--error 1582 --error 1582
alter table t1 add unique index (b); alter table t1 add unique index (b);
insert into t1 values(8,9,'fff','fff'); insert into t1 values(8,9,'fff','fff');
...@@ -170,6 +171,7 @@ create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a ...@@ -170,6 +171,7 @@ create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a
engine = innodb default charset=ucs2; engine = innodb default charset=ucs2;
insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe'); insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
commit; commit;
--replace_regex /Duplicate entry '[0-9]*'/Duplicate entry ''/
--error 1582 --error 1582
alter table t1 add unique index (b); alter table t1 add unique index (b);
show create table t1; show create table t1;
......
...@@ -1995,7 +1995,7 @@ explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a ' ...@@ -1995,7 +1995,7 @@ explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a '
id select_type table type possible_keys key key_len ref rows Extra id select_type table type possible_keys key key_len ref rows Extra
1 SIMPLE t1 ref v v 13 const # Using where; Using index 1 SIMPLE t1 ref v v 13 const # Using where; Using index
alter table t1 add unique(v); alter table t1 add unique(v);
ERROR 23000: Duplicate entry '{ ' for key 'v_2' ERROR 23000: Duplicate entry '' for key 'v_2'
alter table t1 add key(v); alter table t1 add key(v);
select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a'; select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a';
qq qq
......
...@@ -704,6 +704,154 @@ cmp_dtuple_is_prefix_of_rec( ...@@ -704,6 +704,154 @@ cmp_dtuple_is_prefix_of_rec(
return(FALSE); return(FALSE);
} }
#ifndef UNIV_HOTBACKUP
/*****************************************************************
Compare two physical records that contain the same number of columns,
none of which are stored externally. */
int
cmp_rec_rec_simple(
/*===============*/
/* out: 1, 0 , -1 if rec1 is greater, equal,
less, respectively, than rec2 */
const rec_t* rec1, /* in: physical record */
const rec_t* rec2, /* in: physical record */
const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index) /* in: data dictionary index */
{
ulint rec1_f_len; /* length of current field in rec1 */
const byte* rec1_b_ptr; /* pointer to the current byte
in rec1 field */
ulint rec1_byte; /* value of current byte to be
compared in rec1 */
ulint rec2_f_len; /* length of current field in rec2 */
const byte* rec2_b_ptr; /* pointer to the current byte
in rec2 field */
ulint rec2_byte; /* value of current byte to be
compared in rec2 */
ulint cur_field; /* current field number */
ut_ad(!rec_offs_any_extern(offsets1));
ut_ad(!rec_offs_any_extern(offsets2));
ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
ut_ad(rec_offs_n_fields(offsets1) == rec_offs_n_fields(offsets2));
for (cur_field = 0; cur_field < rec_offs_n_fields(offsets1);
cur_field++) {
ulint cur_bytes;
ulint mtype;
ulint prtype;
{
const dict_col_t* col
= dict_index_get_nth_col(index, cur_field);
mtype = col->mtype;
prtype = col->prtype;
}
rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
cur_field, &rec1_f_len);
rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
cur_field, &rec2_f_len);
if (rec1_f_len == UNIV_SQL_NULL
|| rec2_f_len == UNIV_SQL_NULL) {
if (rec1_f_len == rec2_f_len) {
goto next_field;
} else if (rec2_f_len == UNIV_SQL_NULL) {
/* We define the SQL null to be the
smallest possible value of a field
in the alphabetical order */
return(1);
} else {
return(-1);
}
}
if (mtype >= DATA_FLOAT
|| (mtype == DATA_BLOB
&& 0 == (prtype & DATA_BINARY_TYPE)
&& dtype_get_charset_coll(prtype)
!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
int ret = cmp_whole_field(mtype, prtype,
rec1_b_ptr,
(unsigned) rec1_f_len,
rec2_b_ptr,
(unsigned) rec2_f_len);
if (ret) {
return(ret);
}
goto next_field;
}
/* Compare the fields */
for (cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
if (rec2_f_len <= cur_bytes) {
if (rec1_f_len <= cur_bytes) {
goto next_field;
}
rec2_byte = dtype_get_pad_char(mtype, prtype);
if (rec2_byte == ULINT_UNDEFINED) {
return(1);
}
} else {
rec2_byte = *rec2_b_ptr;
}
if (rec1_f_len <= cur_bytes) {
rec1_byte = dtype_get_pad_char(mtype, prtype);
if (rec1_byte == ULINT_UNDEFINED) {
return(-1);
}
} else {
rec1_byte = *rec1_b_ptr;
}
if (rec1_byte == rec2_byte) {
/* If the bytes are equal, they will remain
such even after the collation transformation
below */
continue;
}
if (mtype <= DATA_CHAR
|| (mtype == DATA_BLOB
&& !(prtype & DATA_BINARY_TYPE))) {
rec1_byte = cmp_collate(rec1_byte);
rec2_byte = cmp_collate(rec2_byte);
}
if (rec1_byte < rec2_byte) {
return(-1);
} else if (rec1_byte > rec2_byte) {
return(1);
}
}
next_field:
continue;
}
/* If we ran out of fields, rec1 was equal to rec2. */
return(0);
}
#endif /* !UNIV_HOTBACKUP */
/***************************************************************** /*****************************************************************
This function is used to compare two physical records. Only the common This function is used to compare two physical records. Only the common
first fields are compared, and if an externally stored field is first fields are compared, and if an externally stored field is
......
...@@ -236,6 +236,14 @@ rec_init_offsets_comp_ordinary( ...@@ -236,6 +236,14 @@ rec_init_offsets_comp_ordinary(
dict_field_t* field; dict_field_t* field;
ulint null_mask = 1; ulint null_mask = 1;
#ifdef UNIV_DEBUG
/* We cannot invoke rec_offs_make_valid() here, because it can hold
that extra != REC_N_NEW_EXTRA_BYTES. Similarly, rec_offs_validate()
will fail in that case, because it invokes rec_get_status(). */
offsets[2] = (ulint) rec;
offsets[3] = (ulint) index;
#endif /* UNIV_DEBUG */
/* read the lengths of fields 0..n */ /* read the lengths of fields 0..n */
do { do {
ulint len; ulint len;
...@@ -713,41 +721,50 @@ Determines the size of a data tuple in ROW_FORMAT=COMPACT. */ ...@@ -713,41 +721,50 @@ Determines the size of a data tuple in ROW_FORMAT=COMPACT. */
ulint ulint
rec_get_converted_size_comp( rec_get_converted_size_comp(
/*========================*/ /*========================*/
/* out: size */ /* out: total size */
dict_index_t* index, /* in: record descriptor; dict_index_t* index, /* in: record descriptor;
dict_table_is_comp() is assumed to hold */ dict_table_is_comp() is assumed to hold */
const dtuple_t* dtuple, /* in: data tuple */ ulint status, /* in: status bits of the record */
const dfield_t* fields, /* in: array of data fields */
ulint n_fields,/* in: number of data fields */
const ulint* ext, /* in: array of extern field numbers */ const ulint* ext, /* in: array of extern field numbers */
ulint n_ext) /* in: number of elements in ext */ ulint n_ext, /* in: number of elements in ext */
ulint* extra) /* out: extra size */
{ {
ulint size = REC_N_NEW_EXTRA_BYTES ulint extra_size;
+ UT_BITS_IN_BYTES(index->n_nullable); ulint data_size;
ulint i; ulint i;
ulint j; ulint j;
ulint n_fields; ut_ad(index);
ut_ad(index && dtuple); ut_ad(fields);
ut_ad(dtuple_validate(dtuple)); ut_ad(n_fields > 0);
switch (dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) { switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY: case REC_STATUS_ORDINARY:
n_fields = dict_index_get_n_fields(index); ut_ad(n_fields == dict_index_get_n_fields(index));
ut_ad(n_fields == dtuple_get_n_fields(dtuple)); data_size = 0;
break; break;
case REC_STATUS_NODE_PTR: case REC_STATUS_NODE_PTR:
n_fields = dict_index_get_n_unique_in_tree(index); n_fields--;
ut_ad(n_fields + 1 == dtuple_get_n_fields(dtuple)); ut_ad(n_fields == dict_index_get_n_unique_in_tree(index));
ut_ad(dtuple_get_nth_field(dtuple, n_fields)->len == 4); ut_ad(fields[n_fields].len == 4);
size += 4; /* child page number */ ut_ad(!n_ext);
data_size = 4; /* child page number */
break; break;
case REC_STATUS_INFIMUM: case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM: case REC_STATUS_SUPREMUM:
/* infimum or supremum record, 8 data bytes */ /* infimum or supremum record, 8 data bytes */
return(REC_N_NEW_EXTRA_BYTES + 8); extra_size = REC_N_NEW_EXTRA_BYTES;
data_size = 8;
goto func_exit;
default: default:
ut_error; ut_error;
return(ULINT_UNDEFINED); return(ULINT_UNDEFINED);
} }
extra_size = REC_N_NEW_EXTRA_BYTES
+ UT_BITS_IN_BYTES(index->n_nullable);
/* read the lengths of fields 0..n */ /* read the lengths of fields 0..n */
for (i = j = 0; i < n_fields; i++) { for (i = j = 0; i < n_fields; i++) {
dict_field_t* field; dict_field_t* field;
...@@ -755,12 +772,11 @@ rec_get_converted_size_comp( ...@@ -755,12 +772,11 @@ rec_get_converted_size_comp(
const dict_col_t* col; const dict_col_t* col;
field = dict_index_get_nth_field(index, i); field = dict_index_get_nth_field(index, i);
len = dtuple_get_nth_field(dtuple, i)->len; len = fields[i].len;
col = dict_field_get_col(field); col = dict_field_get_col(field);
ut_ad(dict_col_type_assert_equal( ut_ad(dict_col_type_assert_equal(col,
col, dfield_get_type(dtuple_get_nth_field( dfield_get_type(&fields[i])));
dtuple, i))));
if (len == UNIV_SQL_NULL) { if (len == UNIV_SQL_NULL) {
/* No length is stored for NULL fields. */ /* No length is stored for NULL fields. */
...@@ -777,23 +793,28 @@ rec_get_converted_size_comp( ...@@ -777,23 +793,28 @@ rec_get_converted_size_comp(
|| field->fixed_len == field->prefix_len); || field->fixed_len == field->prefix_len);
} else if (UNIV_UNLIKELY(j < n_ext) && i == ext[j]) { } else if (UNIV_UNLIKELY(j < n_ext) && i == ext[j]) {
j++; j++;
size += 2; extra_size += 2;
} else if (len < 128 } else if (len < 128
|| (col->len < 256 && col->mtype != DATA_BLOB)) { || (col->len < 256 && col->mtype != DATA_BLOB)) {
size++; extra_size++;
} else { } else {
/* For variable-length columns, we look up the /* For variable-length columns, we look up the
maximum length from the column itself. If this maximum length from the column itself. If this
is a prefix index column shorter than 256 bytes, is a prefix index column shorter than 256 bytes,
this will waste one byte. */ this will waste one byte. */
size += 2; extra_size += 2;
} }
size += len; data_size += len;
} }
ut_ad(j == n_ext); ut_ad(j == n_ext);
return(size); func_exit:
if (UNIV_LIKELY_NULL(extra)) {
*extra = extra_size;
}
return(extra_size + data_size);
} }
/*************************************************************** /***************************************************************
...@@ -980,23 +1001,23 @@ rec_convert_dtuple_to_rec_old( ...@@ -980,23 +1001,23 @@ rec_convert_dtuple_to_rec_old(
/************************************************************* /*************************************************************
Builds a ROW_FORMAT=COMPACT record out of a data tuple. */ Builds a ROW_FORMAT=COMPACT record out of a data tuple. */
byte* void
rec_convert_dtuple_to_rec_comp( rec_convert_dtuple_to_rec_comp(
/*===========================*/ /*===========================*/
/* out: pointer to the start of data payload */ rec_t* rec, /* in: origin of record */
byte* buf, /* in: start address of the data area */
ulint extra, /* in: number of bytes to reserve between ulint extra, /* in: number of bytes to reserve between
the record header and the data payload the record header and the data payload
(usually REC_N_NEW_EXTRA_BYTES) */ (normally REC_N_NEW_EXTRA_BYTES) */
dict_index_t* index, /* in: record descriptor */ dict_index_t* index, /* in: record descriptor */
const dtuple_t* dtuple, /* in: data tuple */ ulint status, /* in: status bits of the record */
const dfield_t* fields, /* in: array of data fields */
ulint n_fields,/* in: number of data fields */
const ulint* ext, /* in: array of extern field numbers, const ulint* ext, /* in: array of extern field numbers,
in ascending order */ in ascending order */
ulint n_ext) /* in: number of elements in ext */ ulint n_ext) /* in: number of elements in ext */
{ {
const dfield_t* field; const dfield_t* field;
const dtype_t* type; const dtype_t* type;
rec_t* rec = buf + extra;
byte* end; byte* end;
byte* nulls; byte* nulls;
byte* lens; byte* lens;
...@@ -1006,18 +1027,10 @@ rec_convert_dtuple_to_rec_comp( ...@@ -1006,18 +1027,10 @@ rec_convert_dtuple_to_rec_comp(
ulint n_node_ptr_field; ulint n_node_ptr_field;
ulint fixed_len; ulint fixed_len;
ulint null_mask = 1; ulint null_mask = 1;
const ulint n_fields = dtuple_get_n_fields(dtuple);
ut_ad(dict_table_is_comp(index->table)); ut_ad(dict_table_is_comp(index->table));
ut_ad(n_fields > 0); ut_ad(n_fields > 0);
/* Try to ensure that the memset() between the for() loops switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
completes fast. The address is not exact, but UNIV_PREFETCH
should never generate a memory fault. */
UNIV_PREFETCH_RW(buf - n_fields);
UNIV_PREFETCH_RW(rec);
switch (UNIV_EXPECT(dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK,
REC_STATUS_ORDINARY)) {
case REC_STATUS_ORDINARY: case REC_STATUS_ORDINARY:
ut_ad(n_fields <= dict_index_get_n_fields(index)); ut_ad(n_fields <= dict_index_get_n_fields(index));
n_node_ptr_field = ULINT_UNDEFINED; n_node_ptr_field = ULINT_UNDEFINED;
...@@ -1030,62 +1043,12 @@ rec_convert_dtuple_to_rec_comp( ...@@ -1030,62 +1043,12 @@ rec_convert_dtuple_to_rec_comp(
case REC_STATUS_SUPREMUM: case REC_STATUS_SUPREMUM:
ut_ad(n_fields == 1); ut_ad(n_fields == 1);
n_node_ptr_field = ULINT_UNDEFINED; n_node_ptr_field = ULINT_UNDEFINED;
ut_d(j = 0); break;
goto init;
default: default:
ut_error; ut_error;
return(0); return;
}
/* Calculate the offset of the origin in the physical record.
We must loop over all fields to do this. */
rec += UT_BITS_IN_BYTES(index->n_nullable);
for (i = j = 0; i < n_fields; i++) {
if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
#ifdef UNIV_DEBUG
field = dtuple_get_nth_field(dtuple, i);
type = dfield_get_type(field);
ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
ut_ad(dfield_get_len(field) == 4);
#endif /* UNIV_DEBUG */
goto init;
}
field = dtuple_get_nth_field(dtuple, i);
type = dfield_get_type(field);
len = dfield_get_len(field);
fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
ut_ad(dict_col_type_assert_equal(
dict_field_get_col(dict_index_get_nth_field(
index, i)),
dfield_get_type(field)));
if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
if (len == UNIV_SQL_NULL)
continue;
}
/* only nullable fields can be null */
ut_ad(len != UNIV_SQL_NULL);
if (fixed_len) {
ut_ad(len == fixed_len);
} else {
ut_ad(len <= dtype_get_len(type)
|| dtype_get_mtype(type) == DATA_BLOB);
rec++;
if (len >= 128
&& (dtype_get_len(type) >= 256
|| dtype_get_mtype(type) == DATA_BLOB)) {
rec++;
} else if (UNIV_UNLIKELY(j < n_ext) && i == ext[j]) {
j++;
rec++;
}
}
} }
init:
ut_ad(j == n_ext);
end = rec; end = rec;
nulls = rec - (extra + 1); nulls = rec - (extra + 1);
lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
...@@ -1094,8 +1057,7 @@ init: ...@@ -1094,8 +1057,7 @@ init:
/* Store the data and the offsets */ /* Store the data and the offsets */
for (i = j = 0; i < n_fields; i++) { for (i = j = 0, field = fields; i < n_fields; i++, field++) {
field = dtuple_get_nth_field(dtuple, i);
type = dfield_get_type(field); type = dfield_get_type(field);
len = dfield_get_len(field); len = dfield_get_len(field);
...@@ -1106,7 +1068,6 @@ init: ...@@ -1106,7 +1068,6 @@ init:
end += 4; end += 4;
break; break;
} }
fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
/* nullable field */ /* nullable field */
...@@ -1130,6 +1091,9 @@ init: ...@@ -1130,6 +1091,9 @@ init:
} }
/* only nullable fields can be null */ /* only nullable fields can be null */
ut_ad(len != UNIV_SQL_NULL); ut_ad(len != UNIV_SQL_NULL);
fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
if (fixed_len) { if (fixed_len) {
ut_ad(len == fixed_len); ut_ad(len == fixed_len);
} else { } else {
...@@ -1157,8 +1121,6 @@ init: ...@@ -1157,8 +1121,6 @@ init:
} }
ut_ad(j == n_ext); ut_ad(j == n_ext);
return(rec);
} }
/************************************************************* /*************************************************************
...@@ -1177,8 +1139,19 @@ rec_convert_dtuple_to_rec_new( ...@@ -1177,8 +1139,19 @@ rec_convert_dtuple_to_rec_new(
in ascending order */ in ascending order */
ulint n_ext) /* in: number of elements in ext */ ulint n_ext) /* in: number of elements in ext */
{ {
rec_t* rec = rec_convert_dtuple_to_rec_comp( ulint extra_size;
buf, REC_N_NEW_EXTRA_BYTES, index, dtuple, ext, n_ext); ulint status;
rec_t* rec;
status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
rec_get_converted_size_comp(index, status,
dtuple->fields, dtuple->n_fields,
ext, n_ext, &extra_size);
rec = buf + extra_size;
rec_convert_dtuple_to_rec_comp(
rec, REC_N_NEW_EXTRA_BYTES, index, status,
dtuple->fields, dtuple->n_fields, ext, n_ext);
/* Set the info bits of the record */ /* Set the info bits of the record */
rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple)); rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));
......
This diff is collapsed.
...@@ -33,7 +33,6 @@ Created 9/17/2000 Heikki Tuuri ...@@ -33,7 +33,6 @@ Created 9/17/2000 Heikki Tuuri
#include "btr0sea.h" #include "btr0sea.h"
#include "fil0fil.h" #include "fil0fil.h"
#include "ibuf0ibuf.h" #include "ibuf0ibuf.h"
#include "row0merge.h"
/* A dummy variable used to fool the compiler */ /* A dummy variable used to fool the compiler */
ibool row_mysql_identically_false = FALSE; ibool row_mysql_identically_false = FALSE;
...@@ -4492,93 +4491,6 @@ row_create_index_graph_for_mysql( ...@@ -4492,93 +4491,6 @@ row_create_index_graph_for_mysql(
return(err); return(err);
} }
/*************************************************************************
Build new indexes to a table by reading a clustered index,
creating a temporary file containing index entries, merge sorting
these index entries and inserting sorted index entries to indexes. */
ulint
row_build_index_for_mysql(
/*======================*/
/* out: 0 or error code */
trx_t* trx, /* in: transaction */
dict_table_t* old_table, /* in: Table where rows are
read from */
dict_table_t* new_table, /* in: Table where indexes are
created. Note that old_table ==
new_table if we are creating a
secondary keys. */
dict_index_t** index, /* in: Indexes to be created */
ulint num_of_keys) /* in: Number of indexes to be
created */
{
merge_file_t* merge_files;
ulint index_num;
ulint error;
ut_ad(trx && old_table && new_table && index && num_of_keys);
trx_start_if_not_started(trx);
/* Allocate memory for merge file data structure and initialize
fields */
merge_files = mem_alloc(num_of_keys * sizeof *merge_files);
for (index_num = 0; index_num < num_of_keys; index_num++) {
row_merge_file_create(&merge_files[index_num]);
}
/* Read clustered index of the table and create files for
secondary index entries for merge sort */
error = row_merge_read_clustered_index(
trx, old_table, index, merge_files, num_of_keys);
if (error != DB_SUCCESS) {
goto func_exit;
}
trx_start_if_not_started(trx);
/* Now we have files containing index entries ready for
sorting and inserting. */
for (index_num = 0; index_num < num_of_keys; index_num++) {
/* Do a merge sort and insert from those files
which we have written at least one block */
if (merge_files[index_num].num_of_blocks > 0) {
/* Merge sort file using linked list merge
sort for files. */
row_merge_sort_linked_list_in_disk(
index[index_num],
merge_files[index_num].file,
(int *)&error);
if (error == DB_SUCCESS) {
error = row_merge_insert_index_tuples(
trx, index[index_num], new_table,
merge_files[index_num].file, 0);
}
if (error != DB_SUCCESS) {
trx->error_key_num = index_num;
goto func_exit;
}
}
}
func_exit:
mem_free(merge_files);
return(error);
}
#endif /* !UNIV_HOTBACKUP */ #endif /* !UNIV_HOTBACKUP */
/************************************************************************* /*************************************************************************
......
...@@ -141,7 +141,7 @@ row_build_index_entry( ...@@ -141,7 +141,7 @@ row_build_index_entry(
} }
/*********************************************************************** /***********************************************************************
An inverse function to dict_row_build_index_entry. Builds a row from a An inverse function to row_build_index_entry. Builds a row from a
record in a clustered index. */ record in a clustered index. */
dtuple_t* dtuple_t*
...@@ -256,6 +256,53 @@ row_build( ...@@ -256,6 +256,53 @@ row_build(
return(row); return(row);
} }
/***********************************************************************
Converts an index record to a typed data tuple. */
dtuple_t*
row_rec_to_index_entry_low(
/*=======================*/
/* out, index entry built; does not
set info_bits, and the data fields in
the entry will point directly to rec */
const rec_t* rec, /* in: record in the index */
dict_index_t* index, /* in: index */
const ulint* offsets,/* in: rec_get_offsets(rec, index) */
mem_heap_t* heap) /* in: memory heap from which the memory
needed is allocated */
{
dtuple_t* entry;
dfield_t* dfield;
ulint i;
const byte* field;
ulint len;
ulint rec_len;
ut_ad(rec && heap && index);
rec_len = rec_offs_n_fields(offsets);
entry = dtuple_create(heap, rec_len);
dtuple_set_n_fields_cmp(entry,
dict_index_get_n_unique_in_tree(index));
ut_ad(rec_len == dict_index_get_n_fields(index));
dict_index_copy_types(entry, index, rec_len);
for (i = 0; i < rec_len; i++) {
dfield = dtuple_get_nth_field(entry, i);
field = rec_get_nth_field(rec, offsets, i, &len);
dfield_set_data(dfield, field, len);
}
ut_ad(dtuple_check_typed(entry));
return(entry);
}
/*********************************************************************** /***********************************************************************
Converts an index record to a typed data tuple. NOTE that externally Converts an index record to a typed data tuple. NOTE that externally
stored (often big) fields are NOT copied to heap. */ stored (often big) fields are NOT copied to heap. */
...@@ -281,11 +328,6 @@ row_rec_to_index_entry( ...@@ -281,11 +328,6 @@ row_rec_to_index_entry(
needed is allocated */ needed is allocated */
{ {
dtuple_t* entry; dtuple_t* entry;
dfield_t* dfield;
ulint i;
const byte* field;
ulint len;
ulint rec_len;
byte* buf; byte* buf;
mem_heap_t* tmp_heap = NULL; mem_heap_t* tmp_heap = NULL;
ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint offsets_[REC_OFFS_NORMAL_SIZE];
...@@ -305,29 +347,12 @@ row_rec_to_index_entry( ...@@ -305,29 +347,12 @@ row_rec_to_index_entry(
rec_offs_make_valid(rec, index, offsets); rec_offs_make_valid(rec, index, offsets);
} }
rec_len = rec_offs_n_fields(offsets); entry = row_rec_to_index_entry_low(rec, index, offsets, heap);
entry = dtuple_create(heap, rec_len);
dtuple_set_n_fields_cmp(entry,
dict_index_get_n_unique_in_tree(index));
ut_ad(rec_len == dict_index_get_n_fields(index));
dict_index_copy_types(entry, index, rec_len);
dtuple_set_info_bits(entry, dtuple_set_info_bits(entry,
rec_get_info_bits(rec, rec_offs_comp(offsets))); rec_get_info_bits(rec, rec_offs_comp(offsets)));
for (i = 0; i < rec_len; i++) { if (UNIV_LIKELY_NULL(tmp_heap)) {
dfield = dtuple_get_nth_field(entry, i);
field = rec_get_nth_field(rec, offsets, i, &len);
dfield_set_data(dfield, field, len);
}
ut_ad(dtuple_check_typed(entry));
if (tmp_heap) {
mem_heap_free(tmp_heap); mem_heap_free(tmp_heap);
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment