branches/zip: Reimplement merge sort in fast index creation.

The creation of the primary key does not work. We will have to flag externally stored columns and copy the externally stored part from the old table. row_build_index_for_mysql(): Rename to row_merge_build_indexes(). Move from row0mysql.c to row0merge.c. Remove private declarations from row0merge.h. Make many functions static in row0merge.c. cmp_rec_rec_simple(): A new comparison function. dict_index_get_min_size(): New function. OS_FILE_FROM_FD(fd): A macro for converting from int to os_file_t. rec_convert_dtuple_to_rec_comp(): Make the interface lower-level. rec_get_converted_size_comp(): Return also extra_size. UT_SORT_FUNCTION_BODY(): Remove reference to an obsolete test program. row_rec_to_index_entry_low(): New function. row0merge.c: Implement merge sort based on file streams instead of fixed-size blocks. Sort the small blocks as arrays of dfield_t*, because it is faster than invoking rec_get_offsets() for every comparison.

branches/zip: Reimplement merge sort in fast index creation.
The creation of the primary key does not work. We will have to flag externally stored columns and copy the externally stored part from the old table. row_build_index_for_mysql(): Rename to row_merge_build_indexes(). Move from row0mysql.c to row0merge.c. Remove private declarations from row0merge.h. Make many functions static in row0merge.c. cmp_rec_rec_simple(): A new comparison function. dict_index_get_min_size(): New function. OS_FILE_FROM_FD(fd): A macro for converting from int to os_file_t. rec_convert_dtuple_to_rec_comp(): Make the interface lower-level. rec_get_converted_size_comp(): Return also extra_size. UT_SORT_FUNCTION_BODY(): Remove reference to an obsolete test program. row_rec_to_index_entry_low(): New function. row0merge.c: Implement merge sort based on file streams instead of fixed-size blocks. Sort the small blocks as arrays of dfield_t*, because it is faster than invoking rec_get_offsets() for every comparison.
1a8a63d0 · marko · 673f836f · 1a8a63d0 · 1a8a63d0 · 1a8a63d0
Commit 1a8a63d0 authored Jun 08, 2007 by marko
19 changed files
--- a/handler/ha_innodb.cc
+++ b/handler/ha_innodb.cc
@@ -8283,7 +8283,7 @@ err_exit:
 		/* Read clustered index of the table and build indexes
 		based on this information using temporary files and merge
 		sort.*/
-		error = row_build_index_for_mysql(
+		error = row_merge_build_indexes(
 			trx, innodb_table, indexed_table, index,
 			num_of_idx);


--- a/include/dict0dict.h
+++ b/include/dict0dict.h
@@ -656,6 +656,14 @@ dict_table_get_sys_col_no(
 	const dict_table_t*	table,	/* in: table */
 	ulint			sys);	/* in: DATA_ROW_ID, ... */
 /************************************************************************
+Returns the minimum data size of an index record. */
+UNIV_INLINE
+ulint
+dict_index_get_min_size(
+/*====================*/
+					/* out: minimum data size in bytes */
+	const dict_index_t*	index);	/* in: index */
+/************************************************************************
 Check whether the table uses the compact page format. */
 UNIV_INLINE
 ibool

--- a/include/dict0dict.ic
+++ b/include/dict0dict.ic
@@ -507,6 +507,26 @@ dict_index_get_nth_col_no(
 	return(dict_col_get_no(dict_index_get_nth_col(index, pos)));
 }

+/************************************************************************
+Returns the minimum data size of an index record. */
+UNIV_INLINE
+ulint
+dict_index_get_min_size(
+/*====================*/
+					/* out: minimum data size in bytes */
+	const dict_index_t*	index)	/* in: index */
+{
+	ulint	n	= dict_index_get_n_fields(index);
+	ulint	size	= 0;
+
+	while (n--) {
+		size += dict_col_get_min_size(dict_index_get_nth_col(index,
+								     n));
+	}
+
+	return(size);
+}
+
 /*************************************************************************
 Gets the space id of the root of the index tree. */
 UNIV_INLINE

--- a/include/os0file.h
+++ b/include/os0file.h
@@ -43,8 +43,10 @@ extern ulint	os_n_pending_writes;

 #ifdef __WIN__
 #define os_file_t	HANDLE
+#define OS_FILE_FROM_FD(fd) _get_osfhandle(fd)
 #else
 typedef int	os_file_t;
+#define OS_FILE_FROM_FD(fd) fd
 #endif

 extern ulint	os_innodb_umask;

--- a/include/rem0cmp.h
+++ b/include/rem0cmp.h
@@ -125,6 +125,22 @@ cmp_dtuple_is_prefix_of_rec(
 	const dtuple_t*	dtuple,	/* in: data tuple */
 	const rec_t*	rec,	/* in: physical record */
 	const ulint*	offsets);/* in: array returned by rec_get_offsets() */
+#ifndef UNIV_HOTBACKUP
+/*****************************************************************
+Compare two physical records that contain the same number of columns,
+none of which are stored externally. */
+
+int
+cmp_rec_rec_simple(
+/*===============*/
+				/* out: 1, 0 , -1 if rec1 is greater, equal,
+				less, respectively, than rec2 */
+	const rec_t*	rec1,	/* in: physical record */
+	const rec_t*	rec2,	/* in: physical record */
+	const ulint*	offsets1,/* in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/* in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index);	/* in: data dictionary index */
+#endif /* !UNIV_HOTBACKUP */
 /*****************************************************************
 This function is used to compare two physical records. Only the common
 first fields are compared, and if an externally stored field is

--- a/include/rem0rec.h
+++ b/include/rem0rec.h
@@ -607,16 +607,17 @@ rec_fold(
 /*************************************************************
 Builds a ROW_FORMAT=COMPACT record out of a data tuple. */

-byte*
+void
 rec_convert_dtuple_to_rec_comp(
 /*===========================*/
-				/* out: pointer to the start of data payload */
-	byte*		buf,	/* in: start address of the data area */
+	rec_t*		rec,	/* in: origin of record */
 	ulint		extra,	/* in: number of bytes to reserve between
 				the record header and the data payload
 				(usually REC_N_NEW_EXTRA_BYTES) */
 	dict_index_t*	index,	/* in: record descriptor */
-	const dtuple_t*	dtuple,	/* in: data tuple */
+	ulint		status,	/* in: status bits of the record */
+	const dfield_t*	fields,	/* in: array of data fields */
+	ulint		n_fields,/* in: number of data fields */
 	const ulint*	ext,	/* in: array of extern field numbers,
 				in ascending order */
 	ulint		n_ext);	/* in: number of elements in ext */
@@ -657,9 +658,12 @@ rec_get_converted_size_comp(
 				/* out: size */
 	dict_index_t*	index,	/* in: record descriptor;
 				dict_table_is_comp() is assumed to hold */
-	const dtuple_t*	dtuple,	/* in: data tuple */
+	ulint		status,	/* in: status bits of the record */
+	const dfield_t*	fields,	/* in: array of data fields */
+	ulint		n_fields,/* in: number of data fields */
 	const ulint*	ext,	/* in: array of extern field numbers */
-	ulint		n_ext);	/* in: number of elements in ext */
+	ulint		n_ext,	/* in: number of elements in ext */
+	ulint*		extra);	/* out: extra size */
 /**************************************************************
 The following function returns the size of a data tuple when converted to
 a physical record. */

--- a/include/rem0rec.ic
+++ b/include/rem0rec.ic
@@ -1538,7 +1538,12 @@ rec_get_converted_size(
 		  : dict_index_get_n_fields(index)));

 	if (dict_table_is_comp(index->table)) {
-		return(rec_get_converted_size_comp(index, dtuple, ext, n_ext));
+		return(rec_get_converted_size_comp(index,
+						   dtuple_get_info_bits(dtuple)
+						   & REC_NEW_STATUS_MASK,
+						   dtuple->fields,
+						   dtuple->n_fields,
+						   ext, n_ext, NULL));
 	}

 	data_size = dtuple_get_data_size(dtuple);

--- a/include/row0merge.h
+++ b/include/row0merge.h
@@ -21,17 +21,6 @@ Created 13/06/2005 Jan Lindstrom
 #include "btr0types.h"
 #include "row0mysql.h"

-/* Information about temporary files used in merge sort are stored
-to this structure */
-
-struct merge_file_struct {
-	os_file_t	file;			/* File descriptor */
-	ulint		offset;			/* File offset */
-	ulint		num_of_blocks;		/* Number of blocks */
-};
-
-typedef struct merge_file_struct merge_file_t;
-
 /* This structure holds index field definitions */

 struct merge_index_field_struct {
@@ -53,48 +42,6 @@ struct merge_index_def_struct {

 typedef struct merge_index_def_struct merge_index_def_t;

-/************************************************************************
-Reads clustered index of the table and create temporary files
-containing index entries for indexes to be built. */
-
-ulint
-row_merge_read_clustered_index(
-/*===========================*/
-					/* out: DB_SUCCESS if successfull,
-					or ERROR code */
-	trx_t*		trx,		/* in: transaction */
-	dict_table_t*	table,		/* in: table where index is created */
-	dict_index_t**	index,		/* in: indexes to be created */
-	merge_file_t*	files,		/* in: Files where to write index
-					entries */
-	ulint		num_of_idx);	/* in: number of indexes to be
-					created */
-/************************************************************************
-Read sorted file containing index data tuples and insert these data
-data tuples to the index */
-
-ulint
-row_merge_insert_index_tuples(
-/*==========================*/
-					/* out: 0 or error number */
-	trx_t*		trx, 		/* in: transaction */
-	dict_index_t*	index,		/* in: index */
-	dict_table_t*	table,		/* in: table */
-	os_file_t	file,		/* in: file handle */
-	ulint		offset);	/* in: offset where to start
-					reading */
-/*****************************************************************
-Merge sort for linked list in the disk. */
-
-ulint
-row_merge_sort_linked_list_in_disk(
-/*===============================*/
-					/* out: offset to first block in
-					the list or ULINT_UNDEFINED in
-					case of error */
-	dict_index_t*	index,		/* in: index to be created */
-	os_file_t	file,		/* in: File handle */
-	int*		error);		/* out: 0 or error */
 /*************************************************************************
 Drop an index from the InnoDB system tables. */

@@ -116,13 +63,6 @@ row_merge_drop_indexes(
 	dict_table_t*	table,		/* in: table containing the indexes */
 	dict_index_t**	index,		/* in: indexes to drop */
 	ulint		num_created);	/* in: number of elements in index[] */
-/*************************************************************************
-Initialize memory for a merge file structure */
-
-void
-row_merge_file_create(
-/*==================*/
-	merge_file_t*	merge_file);	/* out: merge file structure */

 /*************************************************************************
 Create a temporary table using a definition of the old table. You must
@@ -136,16 +76,7 @@ row_merge_create_temporary_table(
 	dict_table_t*	table,		/* in: old table definition */
 	trx_t*		trx);		/* in/out: trx (sets error_state) */
 /*************************************************************************
-Update all prebuilts for this table */
-
-void
-row_merge_prebuilts_update(
-/*=======================*/
-
-	trx_t*		trx,		/* in: trx */
-	dict_table_t*	old_table);	/* in: old table */
-/*************************************************************************
-Rename the indexes in the dicitionary. */
+Rename the indexes in the dictionary. */

 ulint
 row_merge_rename_index(
@@ -155,7 +86,7 @@ row_merge_rename_index(
 	dict_table_t*	table,		/* in: Table for index */
 	dict_index_t*	index);		/* in: Index to rename */
 /*************************************************************************
-Create the index and load in to the dicitionary. */
+Create the index and load in to the dictionary. */

 dict_index_t*
 row_merge_create_index(
@@ -166,7 +97,7 @@ row_merge_create_index(
 	const merge_index_def_t*	/* in: the index definition */
 			index_def);
 /*************************************************************************
-Check if a transaction can use an index.*/
+Check if a transaction can use an index. */

 ibool
 row_merge_is_index_usable(
@@ -177,13 +108,31 @@ row_merge_is_index_usable(
 	const dict_index_t*	index);	/* in: index to check */
 /*************************************************************************
 If there are views that refer to the old table name then we "attach" to
-the new instance of the table else we drop it immediately.*/
+the new instance of the table else we drop it immediately. */

 ulint
 row_merge_drop_table(
 /*=================*/
-					/* out: DB_SUCCESS if all OK else
-					error code.*/
+					/* out: DB_SUCCESS or error code */
 	trx_t*		trx,		/* in: transaction */
 	dict_table_t*	table);		/* in: table instance to drop */
+
+/*************************************************************************
+Build indexes on a table by reading a clustered index,
+creating a temporary file containing index entries, merge sorting
+these index entries and inserting sorted index entries to indexes. */
+
+ulint
+row_merge_build_indexes(
+/*====================*/
+					/* out: DB_SUCCESS or error code */
+	trx_t*		trx,		/* in: transaction */
+	dict_table_t*	old_table,	/* in: Table where rows are
+					read from */
+	dict_table_t*	new_table,	/* in: Table where indexes are
+					created. Note that old_table ==
+					new_table if we are creating a
+					secondary keys. */
+	dict_index_t**	indexes,	/* in: indexes to be created */
+	ulint		n_indexes);	/* in: size of indexes[] */
 #endif /* row0merge.h */
--- a/include/row0mysql.h
+++ b/include/row0mysql.h
@@ -503,25 +503,6 @@ row_check_table_for_mysql(
 					handle */
 #endif /* !UNIV_HOTBACKUP */
 /*************************************************************************
-Build new indexes to a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes. */
-
-ulint
-row_build_index_for_mysql(
-/*======================*/
-					/* out: 0 or error code */
-	trx_t*		trx,		/* in: transaction */
-	dict_table_t*	old_table,	/* in: Table where rows are
-					read from */
-	dict_table_t*	new_table,	/* in: Table where indexes are
-					created. Note that old_table ==
-					new_table if we are creating a
-					secondary keys. */
-	dict_index_t**	index,		/* in: Indexes to be created */
-	ulint		num_of_keys);	/* in: Number of indexes to be
-					created */
-/*************************************************************************
 Create query graph for a index creation */

 ulint

--- a/include/row0row.h
+++ b/include/row0row.h
@@ -68,7 +68,7 @@ row_build_index_entry(
 	mem_heap_t*	heap);	/* in: memory heap from which the memory for
 				the index entry is allocated */
 /***********************************************************************
-An inverse function to dict_row_build_index_entry. Builds a row from a
+An inverse function to row_build_index_entry. Builds a row from a
 record in a clustered index. */

 dtuple_t*
@@ -98,6 +98,21 @@ row_build(
 /***********************************************************************
 Converts an index record to a typed data tuple. */

+dtuple_t*
+row_rec_to_index_entry_low(
+/*=======================*/
+				/* out, index entry built; does not
+				set info_bits, and the data fields in
+				the entry will point directly to rec */
+	const rec_t*	rec,	/* in: record in the index */
+	dict_index_t*	index,	/* in: index */
+	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
+	mem_heap_t*	heap);	/* in: memory heap from which the memory
+				needed is allocated */
+/***********************************************************************
+Converts an index record to a typed data tuple. NOTE that externally
+stored (often big) fields are NOT copied to heap. */
+
 dtuple_t*
 row_rec_to_index_entry(
 /*===================*/

--- a/include/ut0sort.h
+++ b/include/ut0sort.h
@@ -30,8 +30,7 @@ and the low (LOW), inclusive, and high (HIGH), noninclusive,
 limits for the sort interval as arguments.
 CMP_FUN is the comparison function name. It takes as arguments
 two elements from the array and returns 1, if the first is bigger,
-0 if equal, and -1 if the second bigger. For an eaxmaple of use
-see test program in tsut.c. */
+0 if equal, and -1 if the second bigger. */

 #define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\
 {\

--- a/mysql-test/innodb-index.result
+++ b/mysql-test/innodb-index.result
@@ -481,7 +481,7 @@ engine = innodb default charset=utf8;
 insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
 commit;
 alter table t1 add unique index (b);
-ERROR 23000: Duplicate entry '0' for key 'b'
+ERROR 23000: Duplicate entry '' for key 'b'
 insert into t1 values(8,9,'fff','fff');
 select * from t1;
 a	b	c	d
@@ -650,7 +650,7 @@ engine = innodb default charset=ucs2;
 insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
 commit;
 alter table t1 add unique index (b);
-ERROR 23000: Duplicate entry '0' for key 'b'
+ERROR 23000: Duplicate entry '' for key 'b'
 show create table t1;
 Table	Create Table
 t1	CREATE TABLE `t1` (

--- a/mysql-test/innodb-index.test
+++ b/mysql-test/innodb-index.test
@@ -132,6 +132,7 @@ create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a
 engine = innodb default charset=utf8;
 insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
 commit;
+--replace_regex /Duplicate entry '[0-9]*'/Duplicate entry ''/
 --error 1582
 alter table t1 add unique index (b);
 insert into t1 values(8,9,'fff','fff');
@@ -170,6 +171,7 @@ create table t1(a int not null, b int, c char(10), d varchar(20), primary key (a
 engine = innodb default charset=ucs2;
 insert into t1 values (1,1,'ab','ab'),(2,2,'ac','ac'),(3,2,'ad','ad'),(4,4,'afe','afe');
 commit;
+--replace_regex /Duplicate entry '[0-9]*'/Duplicate entry ''/
 --error 1582
 alter table t1 add unique index (b);
 show create table t1;

--- a/mysql-test/innodb.result
+++ b/mysql-test/innodb.result
@@ -1995,7 +1995,7 @@ explain select count(*) from t1 where v between 'a' and 'a ' and v between 'a  '
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
 1	SIMPLE	t1	ref	v	v	13	const	#	Using where; Using index
 alter table t1 add unique(v);
-ERROR 23000: Duplicate entry '{ ' for key 'v_2'
+ERROR 23000: Duplicate entry '' for key 'v_2'
 alter table t1 add key(v);
 select concat('*',v,'*',c,'*',t,'*') as qq from t1 where v='a';
 qq

--- a/rem/rem0cmp.c
+++ b/rem/rem0cmp.c
@@ -704,6 +704,154 @@ cmp_dtuple_is_prefix_of_rec(
 	return(FALSE);
 }

+#ifndef UNIV_HOTBACKUP
+/*****************************************************************
+Compare two physical records that contain the same number of columns,
+none of which are stored externally. */
+
+int
+cmp_rec_rec_simple(
+/*===============*/
+				/* out: 1, 0 , -1 if rec1 is greater, equal,
+				less, respectively, than rec2 */
+	const rec_t*	rec1,	/* in: physical record */
+	const rec_t*	rec2,	/* in: physical record */
+	const ulint*	offsets1,/* in: rec_get_offsets(rec1, index) */
+	const ulint*	offsets2,/* in: rec_get_offsets(rec2, index) */
+	dict_index_t*	index)	/* in: data dictionary index */
+{
+	ulint		rec1_f_len;	/* length of current field in rec1 */
+	const byte*	rec1_b_ptr;	/* pointer to the current byte
+					in rec1 field */
+	ulint		rec1_byte;	/* value of current byte to be
+					compared in rec1 */
+	ulint		rec2_f_len;	/* length of current field in rec2 */
+	const byte*	rec2_b_ptr;	/* pointer to the current byte
+					in rec2 field */
+	ulint		rec2_byte;	/* value of current byte to be
+					compared in rec2 */
+	ulint		cur_field;	/* current field number */
+
+	ut_ad(!rec_offs_any_extern(offsets1));
+	ut_ad(!rec_offs_any_extern(offsets2));
+	ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2));
+	ut_ad(rec_offs_n_fields(offsets1) == rec_offs_n_fields(offsets2));
+
+	for (cur_field = 0; cur_field < rec_offs_n_fields(offsets1);
+	     cur_field++) {
+
+		ulint	cur_bytes;
+		ulint	mtype;
+		ulint	prtype;
+
+		{
+			const dict_col_t*	col
+				= dict_index_get_nth_col(index, cur_field);
+
+			mtype = col->mtype;
+			prtype = col->prtype;
+		}
+
+		rec1_b_ptr = rec_get_nth_field(rec1, offsets1,
+					       cur_field, &rec1_f_len);
+		rec2_b_ptr = rec_get_nth_field(rec2, offsets2,
+					       cur_field, &rec2_f_len);
+
+		if (rec1_f_len == UNIV_SQL_NULL
+		    || rec2_f_len == UNIV_SQL_NULL) {
+
+			if (rec1_f_len == rec2_f_len) {
+
+				goto next_field;
+
+			} else if (rec2_f_len == UNIV_SQL_NULL) {
+
+				/* We define the SQL null to be the
+				smallest possible value of a field
+				in the alphabetical order */
+
+				return(1);
+			} else {
+				return(-1);
+			}
+		}
+
+		if (mtype >= DATA_FLOAT
+		    || (mtype == DATA_BLOB
+			&& 0 == (prtype & DATA_BINARY_TYPE)
+			&& dtype_get_charset_coll(prtype)
+			!= DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) {
+			int ret = cmp_whole_field(mtype, prtype,
+						  rec1_b_ptr,
+						  (unsigned) rec1_f_len,
+						  rec2_b_ptr,
+						  (unsigned) rec2_f_len);
+			if (ret) {
+				return(ret);
+			}
+
+			goto next_field;
+		}
+
+		/* Compare the fields */
+		for (cur_bytes = 0;; cur_bytes++, rec1_b_ptr++, rec2_b_ptr++) {
+			if (rec2_f_len <= cur_bytes) {
+
+				if (rec1_f_len <= cur_bytes) {
+
+					goto next_field;
+				}
+
+				rec2_byte = dtype_get_pad_char(mtype, prtype);
+
+				if (rec2_byte == ULINT_UNDEFINED) {
+					return(1);
+				}
+			} else {
+				rec2_byte = *rec2_b_ptr;
+			}
+
+			if (rec1_f_len <= cur_bytes) {
+				rec1_byte = dtype_get_pad_char(mtype, prtype);
+
+				if (rec1_byte == ULINT_UNDEFINED) {
+					return(-1);
+				}
+			} else {
+				rec1_byte = *rec1_b_ptr;
+			}
+
+			if (rec1_byte == rec2_byte) {
+				/* If the bytes are equal, they will remain
+				such even after the collation transformation
+				below */
+
+				continue;
+			}
+
+			if (mtype <= DATA_CHAR
+			    || (mtype == DATA_BLOB
+				&& !(prtype & DATA_BINARY_TYPE))) {
+
+				rec1_byte = cmp_collate(rec1_byte);
+				rec2_byte = cmp_collate(rec2_byte);
+			}
+
+			if (rec1_byte < rec2_byte) {
+				return(-1);
+			} else if (rec1_byte > rec2_byte) {
+				return(1);
+			}
+		}
+next_field:
+		continue;
+	}
+
+	/* If we ran out of fields, rec1 was equal to rec2. */
+	return(0);
+}
+#endif /* !UNIV_HOTBACKUP */
+
 /*****************************************************************
 This function is used to compare two physical records. Only the common
 first fields are compared, and if an externally stored field is

--- a/rem/rem0rec.c
+++ b/rem/rem0rec.c
@@ -236,6 +236,14 @@ rec_init_offsets_comp_ordinary(
 	dict_field_t*	field;
 	ulint		null_mask	= 1;

+#ifdef UNIV_DEBUG
+	/* We cannot invoke rec_offs_make_valid() here, because it can hold
+	that extra != REC_N_NEW_EXTRA_BYTES.  Similarly, rec_offs_validate()
+	will fail in that case, because it invokes rec_get_status(). */
+	offsets[2] = (ulint) rec;
+	offsets[3] = (ulint) index;
+#endif /* UNIV_DEBUG */
+
 	/* read the lengths of fields 0..n */
 	do {
 		ulint	len;
@@ -713,41 +721,50 @@ Determines the size of a data tuple in ROW_FORMAT=COMPACT. */
 ulint
 rec_get_converted_size_comp(
 /*========================*/
-				/* out: size */
+				/* out: total size */
 	dict_index_t*	index,	/* in: record descriptor;
 				dict_table_is_comp() is assumed to hold */
-	const dtuple_t*	dtuple,	/* in: data tuple */
+	ulint		status,	/* in: status bits of the record */
+	const dfield_t*	fields,	/* in: array of data fields */
+	ulint		n_fields,/* in: number of data fields */
 	const ulint*	ext,	/* in: array of extern field numbers */
-	ulint		n_ext)	/* in: number of elements in ext */
+	ulint		n_ext,	/* in: number of elements in ext */
+	ulint*		extra)	/* out: extra size */
 {
-	ulint		size		= REC_N_NEW_EXTRA_BYTES
-		+ UT_BITS_IN_BYTES(index->n_nullable);
+	ulint		extra_size;
+	ulint		data_size;
 	ulint		i;
 	ulint		j;
-	ulint		n_fields;
-	ut_ad(index && dtuple);
-	ut_ad(dtuple_validate(dtuple));
+	ut_ad(index);
+	ut_ad(fields);
+	ut_ad(n_fields > 0);

-	switch (dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) {
+	switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
 	case REC_STATUS_ORDINARY:
-		n_fields = dict_index_get_n_fields(index);
-		ut_ad(n_fields == dtuple_get_n_fields(dtuple));
+		ut_ad(n_fields == dict_index_get_n_fields(index));
+		data_size = 0;
 		break;
 	case REC_STATUS_NODE_PTR:
-		n_fields = dict_index_get_n_unique_in_tree(index);
-		ut_ad(n_fields + 1 == dtuple_get_n_fields(dtuple));
-		ut_ad(dtuple_get_nth_field(dtuple, n_fields)->len == 4);
-		size += 4; /* child page number */
+		n_fields--;
+		ut_ad(n_fields == dict_index_get_n_unique_in_tree(index));
+		ut_ad(fields[n_fields].len == 4);
+		ut_ad(!n_ext);
+		data_size = 4; /* child page number */
 		break;
 	case REC_STATUS_INFIMUM:
 	case REC_STATUS_SUPREMUM:
 		/* infimum or supremum record, 8 data bytes */
-		return(REC_N_NEW_EXTRA_BYTES + 8);
+		extra_size = REC_N_NEW_EXTRA_BYTES;
+		data_size = 8;
+		goto func_exit;
 	default:
 		ut_error;
 		return(ULINT_UNDEFINED);
 	}

+	extra_size = REC_N_NEW_EXTRA_BYTES
+		+ UT_BITS_IN_BYTES(index->n_nullable);
+
 	/* read the lengths of fields 0..n */
 	for (i = j = 0; i < n_fields; i++) {
 		dict_field_t*		field;
@@ -755,12 +772,11 @@ rec_get_converted_size_comp(
 		const dict_col_t*	col;

 		field = dict_index_get_nth_field(index, i);
-		len = dtuple_get_nth_field(dtuple, i)->len;
+		len = fields[i].len;
 		col = dict_field_get_col(field);

-		ut_ad(dict_col_type_assert_equal(
-			      col, dfield_get_type(dtuple_get_nth_field(
-							   dtuple, i))));
+		ut_ad(dict_col_type_assert_equal(col,
+						 dfield_get_type(&fields[i])));

 		if (len == UNIV_SQL_NULL) {
 			/* No length is stored for NULL fields. */
@@ -777,23 +793,28 @@ rec_get_converted_size_comp(
 			      || field->fixed_len == field->prefix_len);
 		} else if (UNIV_UNLIKELY(j < n_ext) && i == ext[j]) {
 			j++;
-			size += 2;
+			extra_size += 2;
 		} else if (len < 128
 			   || (col->len < 256 && col->mtype != DATA_BLOB)) {
-			size++;
+			extra_size++;
 		} else {
 			/* For variable-length columns, we look up the
 			maximum length from the column itself.  If this
 			is a prefix index column shorter than 256 bytes,
 			this will waste one byte. */
-			size += 2;
+			extra_size += 2;
 		}
-		size += len;
+		data_size += len;
 	}

 	ut_ad(j == n_ext);

-	return(size);
+func_exit:
+	if (UNIV_LIKELY_NULL(extra)) {
+		*extra = extra_size;
+	}
+
+	return(extra_size + data_size);
 }

 /***************************************************************
@@ -980,23 +1001,23 @@ rec_convert_dtuple_to_rec_old(
 /*************************************************************
 Builds a ROW_FORMAT=COMPACT record out of a data tuple. */

-byte*
+void
 rec_convert_dtuple_to_rec_comp(
 /*===========================*/
-				/* out: pointer to the start of data payload */
-	byte*		buf,	/* in: start address of the data area */
+	rec_t*		rec,	/* in: origin of record */
 	ulint		extra,	/* in: number of bytes to reserve between
 				the record header and the data payload
-				(usually REC_N_NEW_EXTRA_BYTES) */
+				(normally REC_N_NEW_EXTRA_BYTES) */
 	dict_index_t*	index,	/* in: record descriptor */
-	const dtuple_t*	dtuple,	/* in: data tuple */
+	ulint		status,	/* in: status bits of the record */
+	const dfield_t*	fields,	/* in: array of data fields */
+	ulint		n_fields,/* in: number of data fields */
 	const ulint*	ext,	/* in: array of extern field numbers,
 				in ascending order */
 	ulint		n_ext)	/* in: number of elements in ext */
 {
 	const dfield_t*	field;
 	const dtype_t*	type;
-	rec_t*		rec		= buf + extra;
 	byte*		end;
 	byte*		nulls;
 	byte*		lens;
@@ -1006,18 +1027,10 @@ rec_convert_dtuple_to_rec_comp(
 	ulint		n_node_ptr_field;
 	ulint		fixed_len;
 	ulint		null_mask	= 1;
-	const ulint	n_fields	= dtuple_get_n_fields(dtuple);
 	ut_ad(dict_table_is_comp(index->table));
 	ut_ad(n_fields > 0);

-	/* Try to ensure that the memset() between the for() loops
-	completes fast.	 The address is not exact, but UNIV_PREFETCH
-	should never generate a memory fault. */
-	UNIV_PREFETCH_RW(buf - n_fields);
-	UNIV_PREFETCH_RW(rec);
-
-	switch (UNIV_EXPECT(dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK,
-			    REC_STATUS_ORDINARY)) {
+	switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) {
 	case REC_STATUS_ORDINARY:
 		ut_ad(n_fields <= dict_index_get_n_fields(index));
 		n_node_ptr_field = ULINT_UNDEFINED;
@@ -1030,62 +1043,12 @@ rec_convert_dtuple_to_rec_comp(
 	case REC_STATUS_SUPREMUM:
 		ut_ad(n_fields == 1);
 		n_node_ptr_field = ULINT_UNDEFINED;
-		ut_d(j = 0);
-		goto init;
+		break;
 	default:
 		ut_error;
-		return(0);
-	}
-
-	/* Calculate the offset of the origin in the physical record.
-	We must loop over all fields to do this. */
-	rec += UT_BITS_IN_BYTES(index->n_nullable);
-
-	for (i = j = 0; i < n_fields; i++) {
-		if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
-#ifdef UNIV_DEBUG
-			field = dtuple_get_nth_field(dtuple, i);
-			type = dfield_get_type(field);
-			ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL);
-			ut_ad(dfield_get_len(field) == 4);
-#endif /* UNIV_DEBUG */
-			goto init;
-		}
-		field = dtuple_get_nth_field(dtuple, i);
-		type = dfield_get_type(field);
-		len = dfield_get_len(field);
-		fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
-
-		ut_ad(dict_col_type_assert_equal(
-			      dict_field_get_col(dict_index_get_nth_field(
-							 index, i)),
-			      dfield_get_type(field)));
-
-		if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
-			if (len == UNIV_SQL_NULL)
-				continue;
-		}
-		/* only nullable fields can be null */
-		ut_ad(len != UNIV_SQL_NULL);
-		if (fixed_len) {
-			ut_ad(len == fixed_len);
-		} else {
-			ut_ad(len <= dtype_get_len(type)
-			      || dtype_get_mtype(type) == DATA_BLOB);
-			rec++;
-			if (len >= 128
-			    && (dtype_get_len(type) >= 256
-				|| dtype_get_mtype(type) == DATA_BLOB)) {
-				rec++;
-			} else if (UNIV_UNLIKELY(j < n_ext) && i == ext[j]) {
-				j++;
-				rec++;
-			}
-		}
+		return;
 	}

-init:
-	ut_ad(j == n_ext);
 	end = rec;
 	nulls = rec - (extra + 1);
 	lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
@@ -1094,8 +1057,7 @@ init:

 	/* Store the data and the offsets */

-	for (i = j = 0; i < n_fields; i++) {
-		field = dtuple_get_nth_field(dtuple, i);
+	for (i = j = 0, field = fields; i < n_fields; i++, field++) {
 		type = dfield_get_type(field);
 		len = dfield_get_len(field);

@@ -1106,7 +1068,6 @@ init:
 			end += 4;
 			break;
 		}
-		fixed_len = dict_index_get_nth_field(index, i)->fixed_len;

 		if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
 			/* nullable field */
@@ -1130,6 +1091,9 @@ init:
 		}
 		/* only nullable fields can be null */
 		ut_ad(len != UNIV_SQL_NULL);
+
+		fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
+
 		if (fixed_len) {
 			ut_ad(len == fixed_len);
 		} else {
@@ -1157,8 +1121,6 @@ init:
 	}

 	ut_ad(j == n_ext);
-
-	return(rec);
 }

 /*************************************************************
@@ -1177,8 +1139,19 @@ rec_convert_dtuple_to_rec_new(
 				in ascending order */
 	ulint		n_ext)	/* in: number of elements in ext */
 {
-	rec_t*	rec = rec_convert_dtuple_to_rec_comp(
-		buf, REC_N_NEW_EXTRA_BYTES, index, dtuple, ext, n_ext);
+	ulint	extra_size;
+	ulint	status;
+	rec_t*	rec;
+
+	status = dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK;
+	rec_get_converted_size_comp(index, status,
+				    dtuple->fields, dtuple->n_fields,
+				    ext, n_ext, &extra_size);
+	rec = buf + extra_size;
+
+	rec_convert_dtuple_to_rec_comp(
+		rec, REC_N_NEW_EXTRA_BYTES, index, status,
+		dtuple->fields, dtuple->n_fields, ext, n_ext);

 	/* Set the info bits of the record */
 	rec_set_info_and_status_bits(rec, dtuple_get_info_bits(dtuple));

--- a/row/row0merge.c
+++ b/row/row0merge.c
--- a/row/row0mysql.c
+++ b/row/row0mysql.c
@@ -33,7 +33,6 @@ Created 9/17/2000 Heikki Tuuri
 #include "btr0sea.h"
 #include "fil0fil.h"
 #include "ibuf0ibuf.h"
-#include "row0merge.h"

 /* A dummy variable used to fool the compiler */
 ibool	row_mysql_identically_false	= FALSE;
@@ -4492,93 +4491,6 @@ row_create_index_graph_for_mysql(

 	return(err);
 }
-
-/*************************************************************************
-Build new indexes to a table by reading a clustered index,
-creating a temporary file containing index entries, merge sorting
-these index entries and inserting sorted index entries to indexes. */
-
-ulint
-row_build_index_for_mysql(
-/*======================*/
-					/* out: 0 or error code */
-	trx_t*		trx,		/* in: transaction */
-	dict_table_t*	old_table,	/* in: Table where rows are
-					read from */
-	dict_table_t*	new_table,	/* in: Table where indexes are
-					created. Note that old_table ==
-					new_table if we are creating a
-					secondary keys. */
-	dict_index_t**	index,		/* in: Indexes to be created */
-	ulint		num_of_keys)	/* in: Number of indexes to be
-					created */
-{
-	merge_file_t*	merge_files;
-	ulint		index_num;
-	ulint		error;
-
-	ut_ad(trx && old_table && new_table && index && num_of_keys);
-
-	trx_start_if_not_started(trx);
-
-	/* Allocate memory for merge file data structure and initialize
-	fields */
-
-	merge_files = mem_alloc(num_of_keys * sizeof *merge_files);
-
-	for (index_num = 0; index_num < num_of_keys; index_num++) {
-
-		row_merge_file_create(&merge_files[index_num]);
-	}
-
-	/* Read clustered index of the table and create files for
-	secondary index entries for merge sort */
-
-	error = row_merge_read_clustered_index(
-		trx, old_table, index, merge_files, num_of_keys);
-
-	if (error != DB_SUCCESS) {
-
-		goto func_exit;
-	}
-
-	trx_start_if_not_started(trx);
-
-	/* Now we have files containing index entries ready for
-	sorting and inserting. */
-
-	for (index_num = 0; index_num < num_of_keys; index_num++) {
-
-		/* Do a merge sort and insert from those files
-		which we have written at least one block */
-
-		if (merge_files[index_num].num_of_blocks > 0) {
-			/* Merge sort file using linked list merge
-			sort for files. */
-
-			row_merge_sort_linked_list_in_disk(
-				index[index_num],
-				merge_files[index_num].file,
-				(int *)&error);
-
-			if (error == DB_SUCCESS) {
-				error = row_merge_insert_index_tuples(
-					trx, index[index_num], new_table,
-					merge_files[index_num].file, 0);
-			}
-
-			if (error != DB_SUCCESS) {
-				trx->error_key_num = index_num;
-				goto func_exit;
-			}
-		}
-	}
-
-func_exit:
-	mem_free(merge_files);
-
-	return(error);
-}
 #endif /* !UNIV_HOTBACKUP */

 /*************************************************************************

--- a/row/row0row.c
+++ b/row/row0row.c
@@ -141,7 +141,7 @@ row_build_index_entry(
 }

 /***********************************************************************
-An inverse function to dict_row_build_index_entry. Builds a row from a
+An inverse function to row_build_index_entry. Builds a row from a
 record in a clustered index. */

 dtuple_t*
@@ -256,6 +256,53 @@ row_build(
 	return(row);
 }

+/***********************************************************************
+Converts an index record to a typed data tuple. */
+
+dtuple_t*
+row_rec_to_index_entry_low(
+/*=======================*/
+				/* out, index entry built; does not
+				set info_bits, and the data fields in
+				the entry will point directly to rec */
+	const rec_t*	rec,	/* in: record in the index */
+	dict_index_t*	index,	/* in: index */
+	const ulint*	offsets,/* in: rec_get_offsets(rec, index) */
+	mem_heap_t*	heap)	/* in: memory heap from which the memory
+				needed is allocated */
+{
+	dtuple_t*	entry;
+	dfield_t*	dfield;
+	ulint		i;
+	const byte*	field;
+	ulint		len;
+	ulint		rec_len;
+
+	ut_ad(rec && heap && index);
+
+	rec_len = rec_offs_n_fields(offsets);
+
+	entry = dtuple_create(heap, rec_len);
+
+	dtuple_set_n_fields_cmp(entry,
+				dict_index_get_n_unique_in_tree(index));
+	ut_ad(rec_len == dict_index_get_n_fields(index));
+
+	dict_index_copy_types(entry, index, rec_len);
+
+	for (i = 0; i < rec_len; i++) {
+
+		dfield = dtuple_get_nth_field(entry, i);
+		field = rec_get_nth_field(rec, offsets, i, &len);
+
+		dfield_set_data(dfield, field, len);
+	}
+
+	ut_ad(dtuple_check_typed(entry));
+
+	return(entry);
+}
+
 /***********************************************************************
 Converts an index record to a typed data tuple. NOTE that externally
 stored (often big) fields are NOT copied to heap. */
@@ -281,11 +328,6 @@ row_rec_to_index_entry(
 				needed is allocated */
 {
 	dtuple_t*	entry;
-	dfield_t*	dfield;
-	ulint		i;
-	const byte*	field;
-	ulint		len;
-	ulint		rec_len;
 	byte*		buf;
 	mem_heap_t*	tmp_heap	= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
@@ -305,29 +347,12 @@ row_rec_to_index_entry(
 		rec_offs_make_valid(rec, index, offsets);
 	}

-	rec_len = rec_offs_n_fields(offsets);
-
-	entry = dtuple_create(heap, rec_len);
-
-	dtuple_set_n_fields_cmp(entry,
-				dict_index_get_n_unique_in_tree(index));
-	ut_ad(rec_len == dict_index_get_n_fields(index));
-
-	dict_index_copy_types(entry, index, rec_len);
+	entry = row_rec_to_index_entry_low(rec, index, offsets, heap);

 	dtuple_set_info_bits(entry,
 			     rec_get_info_bits(rec, rec_offs_comp(offsets)));

-	for (i = 0; i < rec_len; i++) {
-
-		dfield = dtuple_get_nth_field(entry, i);
-		field = rec_get_nth_field(rec, offsets, i, &len);
-
-		dfield_set_data(dfield, field, len);
-	}
-
-	ut_ad(dtuple_check_typed(entry));
-	if (tmp_heap) {
+	if (UNIV_LIKELY_NULL(tmp_heap)) {
 		mem_heap_free(tmp_heap);
 	}