MDEV-26131 SEGV in ha_innobase::discard_or_import_tablespace

Import operation without .cfg file fails when there is mismatch of index between metadata table and .ibd file. Moreover, MDEV-19022 shows that InnoDB can end up with index tree where non-leaf page has only one child page. So it is unsafe to find the secondary index root page. This patch does the following when importing the table without .cfg file: 1) If the metadata contains more than one index then InnoDB stops the import operation and report the user to drop all secondary indexes before doing import operation. 2) When the metadata contain only clustered index then InnoDB finds the index id by reading page 0 & page 3 instead of traversing the whole tablespace.

MDEV-26131 SEGV in ha_innobase::discard_or_import_tablespace
Import operation without .cfg file fails when there is mismatch of index between metadata table and .ibd file. Moreover, MDEV-19022 shows that InnoDB can end up with index tree where non-leaf page has only one child page. So it is unsafe to find the secondary index root page. This patch does the following when importing the table without .cfg file: 1) If the metadata contains more than one index then InnoDB stops the import operation and report the user to drop all secondary indexes before doing import operation. 2) When the metadata contain only clustered index then InnoDB finds the index id by reading page 0 & page 3 instead of traversing the whole tablespace.
89445b64 · Thirunarayanan Balathandayuthapani · 4cd063b9 · 89445b64 · 89445b64 · 89445b64
Commit 89445b64 authored Jul 28, 2021 by Thirunarayanan Balathandayuthapani
6 changed files
--- a/mysql-test/suite/encryption/r/innodb_import.result
+++ b/mysql-test/suite/encryption/r/innodb_import.result
+#
+# MDEV-26131 SEGV in ha_innobase::discard_or_import_tablespace
+#
+CREATE TABLE t1(f1 int,f2 text)ENGINE=InnoDB;
+INSERT INTO t1 VALUES(1, "InnoDB");
+CREATE TABLE t2 LIKE t1;
+ALTER TABLE t2 ADD KEY idx (f2(13));
+ALTER TABLE t2 DISCARD TABLESPACE;
+FLUSH TABLES t1 FOR EXPORT;
+UNLOCK TABLES;
+ALTER TABLE t2 IMPORT TABLESPACE;
+ERROR HY000: Internal error: Drop all secondary indexes before importing table test/t2 when .cfg file is missing.
+ALTER TABLE t2 DROP KEY idx;
+Warnings:
+Warning	1814	Tablespace has been discarded for table `t2`
+ALTER TABLE t2 IMPORT TABLESPACE;
+Warnings:
+Warning	1810	IO Read error: (2, No such file or directory) Error opening './test/t2.cfg', will attempt to import without schema verification
+SELECT * FROM t2;
+f1	f2
+1	InnoDB
+DROP TABLE t1, t2;
--- a/mysql-test/suite/encryption/t/innodb_import.combinations
+++ b/mysql-test/suite/encryption/t/innodb_import.combinations
+[page_compressed]
+innodb-compression-default=1
+[encryption]
+innodb-encrypt-tables=1
+[page_compressed_encryption]
+innodb-compression-default=1
+innodb-encrypt-tables=1
--- a/mysql-test/suite/encryption/t/innodb_import.test
+++ b/mysql-test/suite/encryption/t/innodb_import.test
+--source include/have_innodb.inc
+-- source include/have_example_key_management_plugin.inc
+--echo #
+--echo # MDEV-26131 SEGV in ha_innobase::discard_or_import_tablespace
+--echo #
+let $MYSQLD_DATADIR = `SELECT @@datadir`;
+CREATE TABLE t1(f1 int,f2 text)ENGINE=InnoDB;
+INSERT INTO t1 VALUES(1, "InnoDB");
+CREATE TABLE t2 LIKE t1;
+ALTER TABLE t2 ADD KEY idx (f2(13));
+ALTER TABLE t2 DISCARD TABLESPACE;
+FLUSH TABLES t1 FOR EXPORT;
+--copy_file $MYSQLD_DATADIR/test/t1.ibd $MYSQLD_DATADIR/test/t2.ibd
+UNLOCK TABLES;
+--error ER_INTERNAL_ERROR
+ALTER TABLE t2 IMPORT TABLESPACE;
+
+ALTER TABLE t2 DROP KEY idx;
+ALTER TABLE t2 IMPORT TABLESPACE;
+SELECT * FROM t2;
+DROP TABLE t1, t2;
--- a/mysql-test/suite/innodb_gis/r/alter_spatial_index.result
+++ b/mysql-test/suite/innodb_gis/r/alter_spatial_index.result
@@ -252,6 +252,16 @@ UNLOCK TABLES;
 ALTER TABLE tab DISCARD TABLESPACE;
 SELECT c1,ST_Astext(c2),ST_Astext(c4) FROM tab;
 ERROR HY000: Tablespace has been discarded for table `tab`
+ERROR HY000: Internal error: Drop all secondary indexes before importing table test/tab when .cfg file is missing.
+Table	Create Table
+tab	CREATE TABLE `tab` (
+  `c1` int(11) NOT NULL,
+  `c2` point NOT NULL,
+  `c3` linestring NOT NULL,
+  `c4` polygon NOT NULL,
+  `c5` geometry NOT NULL,
+  PRIMARY KEY (`c2`(25))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
 CHECK TABLE tab;
 Table	Op	Msg_type	Msg_text
 test.tab	check	status	OK
@@ -282,9 +292,6 @@ INSERT INTO tab SELECT * FROM tab1;
 ALTER TABLE tab DROP PRIMARY KEY;
 affected rows: 1
 info: Records: 1  Duplicates: 0  Warnings: 0
-ALTER TABLE tab DROP INDEX idx2;
-affected rows: 0
-info: Records: 0  Duplicates: 0  Warnings: 0
 SET STATEMENT sql_mode = 'NO_ENGINE_SUBSTITUTION' FOR
 CREATE TEMPORARY TABLE temp_tab AS SELECT * FROM tab where c1 = c2;
 INSERT INTO temp_tab SELECT * FROM tab;
@@ -322,18 +329,10 @@ tab	CREATE TABLE `tab` (
  `c2` point NOT NULL,
  `c3` linestring NOT NULL,
  `c4` polygon NOT NULL,
-  `c5` geometry NOT NULL,
-  SPATIAL KEY `idx3` (`c3`),
-  SPATIAL KEY `idx4` (`c4`) COMMENT 'testing spatial index on Polygon',
-  SPATIAL KEY `idx5` (`c5`) COMMENT 'testing spatial index on Geometry',
-  KEY `idx6` (`c4`(10)) USING BTREE
+  `c5` geometry NOT NULL
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 SHOW INDEX FROM tab;
 Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment	Index_comment
-tab	1	idx3	1	c3	A	#	32	NULL		SPATIAL		
-tab	1	idx4	1	c4	A	#	32	NULL		SPATIAL		testing spatial index on Polygon
-tab	1	idx5	1	c5	A	#	32	NULL		SPATIAL		testing spatial index on Geometry
-tab	1	idx6	1	c4	A	#	10	NULL		BTREE		
 DELETE FROM tab;
 ALTER TABLE tab ADD PRIMARY KEY(c2);
 affected rows: 0
@@ -354,20 +353,12 @@ tab	CREATE TABLE `tab` (
  `c5` geometry NOT NULL,
  PRIMARY KEY (`c2`(25)),
  UNIQUE KEY `const_1` (`c2`(25)),
-  SPATIAL KEY `idx3` (`c3`),
-  SPATIAL KEY `idx4` (`c4`) COMMENT 'testing spatial index on Polygon',
-  SPATIAL KEY `idx5` (`c5`) COMMENT 'testing spatial index on Geometry',
-  KEY `idx6` (`c4`(10)) USING BTREE,
  SPATIAL KEY `idx2` (`c2`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 SHOW INDEX FROM tab;
 Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment	Index_comment
 tab	0	PRIMARY	1	c2	A	#	25	NULL		BTREE		
 tab	0	const_1	1	c2	A	#	25	NULL		BTREE		
-tab	1	idx3	1	c3	A	#	32	NULL		SPATIAL		
-tab	1	idx4	1	c4	A	#	32	NULL		SPATIAL		testing spatial index on Polygon
-tab	1	idx5	1	c5	A	#	32	NULL		SPATIAL		testing spatial index on Geometry
-tab	1	idx6	1	c4	A	#	10	NULL		BTREE		
 tab	1	idx2	1	c2	A	#	32	NULL		SPATIAL		
 INSERT INTO tab(c1,c2,c3,c4,c5)
 VALUES(1,ST_GeomFromText('POINT(10 10)'),ST_GeomFromText('LINESTRING(5 5,20 20,30 30)'),
@@ -396,20 +387,12 @@ tab	CREATE TABLE `tab` (
  `c5` geometry NOT NULL,
  PRIMARY KEY (`c5`(10)),
  UNIQUE KEY `const_1` (`c5`(10)),
-  SPATIAL KEY `idx3` (`c3`),
-  SPATIAL KEY `idx4` (`c4`) COMMENT 'testing spatial index on Polygon',
-  SPATIAL KEY `idx5` (`c5`) COMMENT 'testing spatial index on Geometry',
-  KEY `idx6` (`c4`(10)) USING BTREE,
  SPATIAL KEY `idx2` (`c2`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 SHOW INDEX FROM tab;
 Table	Non_unique	Key_name	Seq_in_index	Column_name	Collation	Cardinality	Sub_part	Packed	Null	Index_type	Comment	Index_comment
 tab	0	PRIMARY	1	c5	A	#	10	NULL		BTREE		
 tab	0	const_1	1	c5	A	#	10	NULL		BTREE		
-tab	1	idx3	1	c3	A	#	32	NULL		SPATIAL		
-tab	1	idx4	1	c4	A	#	32	NULL		SPATIAL		testing spatial index on Polygon
-tab	1	idx5	1	c5	A	#	32	NULL		SPATIAL		testing spatial index on Geometry
-tab	1	idx6	1	c4	A	#	10	NULL		BTREE		
 tab	1	idx2	1	c2	A	#	32	NULL		SPATIAL		
 INSERT INTO tab(c1,c2,c3,c4,c5)
 VALUES(1,ST_GeomFromText('POINT(10 10)'),ST_GeomFromText('LINESTRING(5 5,20 20,30 30)'),

--- a/mysql-test/suite/innodb_gis/t/alter_spatial_index.test
+++ b/mysql-test/suite/innodb_gis/t/alter_spatial_index.test
@@ -277,8 +277,17 @@ SELECT c1,ST_Astext(c2),ST_Astext(c4) FROM tab;

 --disable_query_log

+--error ER_INTERNAL_ERROR
 ALTER TABLE tab IMPORT TABLESPACE;

+ALTER TABLE tab DROP INDEX idx2;
+ALTER TABLE tab DROP INDEX idx3;
+ALTER TABLE tab DROP INDEX idx4;
+ALTER TABLE tab DROP INDEX idx5;
+ALTER TABLE tab DROP INDEX idx6;
+
+SHOW CREATE TABLE tab;
+ALTER TABLE tab IMPORT TABLESPACE;
 --enable_query_log

 CHECK TABLE tab;
@@ -308,7 +317,6 @@ INSERT INTO tab SELECT * FROM tab1;
 --enable_info
 ALTER TABLE tab DROP PRIMARY KEY;

-ALTER TABLE tab DROP INDEX idx2;
 --disable_info

 # Check spatial index on temp tables

--- a/storage/innobase/row/row0import.cc
+++ b/storage/innobase/row/row0import.cc
@@ -222,6 +222,19 @@ struct row_import {
 						found and was readable */
 };

+struct fil_iterator_t {
+	pfs_os_file_t	file;			/*!< File handle */
+	const char*	filepath;		/*!< File path name */
+	os_offset_t	start;			/*!< From where to start */
+	os_offset_t	end;			/*!< Where to stop */
+	os_offset_t	file_size;		/*!< File size in bytes */
+	ulint		n_io_buffers;		/*!< Number of pages to use
+						for IO */
+	byte*		io_buffer;		/*!< Buffer to use for IO */
+	fil_space_crypt_t *crypt_data;		/*!< Crypt data (if encrypted) */
+	byte*           crypt_io_buffer;        /*!< IO buffer when encrypted */
+};
+
 /** Use the page cursor to iterate over records in a block. */
 class RecIterator {
 public:
@@ -432,6 +445,10 @@ class AbstractCallback
 			? block->page.zip.data : block->frame;
 	}

+	/** Invoke the functionality for the callback */
+	virtual dberr_t run(const fil_iterator_t& iter,
+			    buf_block_t* block) UNIV_NOTHROW = 0;
+
 protected:
 	/** Get the physical offset of the extent descriptor within the page.
 	@param page_no page number of the extent descriptor
@@ -591,6 +608,24 @@ AbstractCallback::init(
 	return set_current_xdes(0, page);
 }

+/**
+TODO: This can be made parallel trivially by chunking up the file
+and creating a callback per thread.. Main benefit will be to use
+multiple CPUs for checksums and compressed tables. We have to do
+compressed tables block by block right now. Secondly we need to
+decompress/compress and copy too much of data. These are
+CPU intensive.
+
+Iterate over all the pages in the tablespace.
+@param iter - Tablespace iterator
+@param block - block to use for IO
+@param callback - Callback to inspect and update page contents
+@retval DB_SUCCESS or error code */
+static dberr_t fil_iterate(
+	const fil_iterator_t&	iter,
+	buf_block_t*		block,
+	AbstractCallback&	callback);
+
 /**
 Try and determine the index root pages by checking if the next/prev
 pointers are both FIL_NULL. We need to ensure that skip deleted pages. */
@@ -608,15 +643,13 @@ struct FetchIndexRootPages : public AbstractCallback {
 		ulint		m_page_no;	/*!< Root page number */
 	};

-	typedef std::vector<Index, ut_allocator<Index> >	Indexes;
-
 	/** Constructor
 	@param trx covering (user) transaction
 	@param table table definition in server .*/
 	FetchIndexRootPages(const dict_table_t* table, trx_t* trx)
 		:
 		AbstractCallback(trx),
-		m_table(table) UNIV_NOTHROW { }
+		m_table(table), m_index(0, 0) UNIV_NOTHROW { }

 	/** Destructor */
 	virtual ~FetchIndexRootPages() UNIV_NOTHROW { }
@@ -628,6 +661,13 @@ struct FetchIndexRootPages : public AbstractCallback {
 		return(m_space);
 	}

+	/** Fetch the clustered index root page in the tablespace
+	@param iter	Tablespace iterator
+	@param block	Block to use for IO
+	@retval DB_SUCCESS or error code */
+	dberr_t run(const fil_iterator_t& iter,
+		    buf_block_t* block) UNIV_NOTHROW;
+
 	/** Called for each block as it is read from the file.
 	@param block block to convert, it is not from the buffer pool.
 	@retval DB_SUCCESS or error code. */
@@ -641,7 +681,7 @@ struct FetchIndexRootPages : public AbstractCallback {
 	const dict_table_t*	m_table;

 	/** Index information */
-	Indexes			m_indexes;
+	Index			m_index;
 };

 /** Called for each block as it is read from the file. Check index pages to
@@ -656,19 +696,11 @@ dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW

 	const page_t*	page = get_frame(block);

-	ulint	page_type = fil_page_get_type(page);
-
-	if (page_type == FIL_PAGE_TYPE_XDES) {
-		return set_current_xdes(block->page.id.page_no(), page);
-	} else if (fil_page_index_page_check(page)
-		   && !is_free(block->page.id.page_no())
-		   && !page_has_siblings(page)) {
-
 	index_id_t	id = btr_page_get_index_id(page);

-		m_indexes.push_back(Index(id, block->page.id.page_no()));
+	m_index.m_id = id;
+	m_index.m_page_no = block->page.id.page_no();

-		if (m_indexes.size() == 1) {
 	/* Check that the tablespace flags match the table flags. */
 	ulint expected = dict_tf_to_fsp_flags(m_table->flags);
 	if (!fsp_flags_match(expected, m_space_flags)) {
@@ -680,8 +712,6 @@ dberr_t FetchIndexRootPages::operator()(buf_block_t* block) UNIV_NOTHROW
 			unsigned(m_space_flags));
 		return(DB_CORRUPTION);
 	}
-		}
-	}

 	return DB_SUCCESS;
 }
@@ -692,11 +722,9 @@ Update the import configuration that will be used to import the tablespace.
 dberr_t
 FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW
 {
-	Indexes::const_iterator end = m_indexes.end();
-
 	ut_a(cfg->m_table == m_table);
 	cfg->m_page_size.copy_from(m_page_size);
-	cfg->m_n_indexes = m_indexes.size();
+	cfg->m_n_indexes = 1;

 	if (cfg->m_n_indexes == 0) {

@@ -722,13 +750,9 @@ FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW

 	row_index_t*	cfg_index = cfg->m_indexes;

-	for (Indexes::const_iterator it = m_indexes.begin();
-	     it != end;
-	     ++it, ++cfg_index) {
-
 	char	name[BUFSIZ];

-		snprintf(name, sizeof(name), "index" IB_ID_FMT, it->m_id);
+	snprintf(name, sizeof(name), "index" IB_ID_FMT, m_index.m_id);

 	ulint	len = strlen(name) + 1;

@@ -747,12 +771,11 @@ FetchIndexRootPages::build_row_import(row_import* cfg) const UNIV_NOTHROW

 	memcpy(cfg_index->m_name, name, len);

-		cfg_index->m_id = it->m_id;
+	cfg_index->m_id = m_index.m_id;

 	cfg_index->m_space = m_space;

-		cfg_index->m_page_no = it->m_page_no;
-	}
+	cfg_index->m_page_no = m_index.m_page_no;

 	return(DB_SUCCESS);
 }
@@ -803,6 +826,11 @@ class PageConverter : public AbstractCallback {
 		return(m_cfg->m_table->space);
 	}

+	dberr_t run(const fil_iterator_t& iter, buf_block_t* block) UNIV_NOTHROW
+	{
+		return fil_iterate(iter, block, *this);
+	}
+
 	/** Called for each block as it is read from the file.
 	@param block block to convert, it is not from the buffer pool.
 	@retval DB_SUCCESS or error code. */
@@ -1872,7 +1900,7 @@ PageConverter::update_index_page(

 	if (is_free(block->page.id.page_no())) {
 		return(DB_SUCCESS);
-	} else if ((id = btr_page_get_index_id(page)) != m_index->m_id) {
+	} else if ((id = btr_page_get_index_id(page)) != m_index->m_id && !m_cfg->m_missing) {

 		row_index_t*	index = find_index(id);

@@ -3362,20 +3390,6 @@ row_import_update_discarded_flag(
 	return(err);
 }

-struct fil_iterator_t {
-	pfs_os_file_t	file;			/*!< File handle */
-	const char*	filepath;		/*!< File path name */
-	os_offset_t	start;			/*!< From where to start */
-	os_offset_t	end;			/*!< Where to stop */
-	os_offset_t	file_size;		/*!< File size in bytes */
-	ulint		n_io_buffers;		/*!< Number of pages to use
-						for IO */
-	byte*		io_buffer;		/*!< Buffer to use for IO */
-	fil_space_crypt_t *crypt_data;		/*!< Crypt data (if encrypted) */
-	byte*           crypt_io_buffer;        /*!< IO buffer when encrypted */
-};
-
-
 /** InnoDB writes page by page when there is page compressed
 tablespace involved. It does help to save the disk space when
 punch hole is enabled
@@ -3426,22 +3440,91 @@ dberr_t fil_import_compress_fwrite(const fil_iterator_t &iter,
  return err;
 }

-/********************************************************************//**
-TODO: This can be made parallel trivially by chunking up the file and creating
-a callback per thread. . Main benefit will be to use multiple CPUs for
-checksums and compressed tables. We have to do compressed tables block by
-block right now. Secondly we need to decompress/compress and copy too much
-of data. These are CPU intensive.
+dberr_t FetchIndexRootPages::run(const fil_iterator_t& iter,
+				 buf_block_t* block) UNIV_NOTHROW
+{
+  const ulint size= get_page_size().physical();
+  const ulint buf_size = srv_page_size
+#ifdef HAVE_LZO
+		+ LZO1X_1_15_MEM_COMPRESS
+#elif defined HAVE_SNAPPY
+		+ snappy_max_compressed_length(srv_page_size)
+#endif
+		;
+  byte* page_compress_buf = static_cast<byte*>(malloc(buf_size));
+  ut_ad(!srv_read_only_mode);

-Iterate over all the pages in the tablespace.
-@param iter - Tablespace iterator
-@param block - block to use for IO
-@param callback - Callback to inspect and update page contents
-@retval DB_SUCCESS or error code */
-static
-dberr_t
-fil_iterate(
-/*========*/
+  if (!page_compress_buf)
+    return DB_OUT_OF_MEMORY;
+
+  const bool encrypted= iter.crypt_data != NULL &&
+    iter.crypt_data->should_encrypt();
+  byte* const readptr= iter.io_buffer;
+  block->frame= readptr;
+
+  if (block->page.zip.data)
+    block->page.zip.data= readptr;
+
+  IORequest read_request(IORequest::READ);
+  read_request.disable_partial_io_warnings();
+  ulint page_no= 0;
+  bool page_compressed= false;
+
+  dberr_t err= os_file_read_no_error_handling(
+    read_request, iter.file, readptr, 3 * size, size, 0);
+  if (err != DB_SUCCESS)
+  {
+    ib::error() << iter.filepath << ": os_file_read() failed";
+    goto func_exit;
+  }
+
+  block->page.id.set_page_no(3);
+  page_no= page_get_page_no(readptr);
+
+  if (page_no != 3)
+  {
+page_corrupted:
+    ib::warn() << filename() << ": Page 3 at offset "
+               << 3 * size << " looks corrupted.";
+    err= DB_CORRUPTION;
+    goto func_exit;
+  }
+
+  page_compressed= fil_page_is_compressed_encrypted(readptr) ||
+    fil_page_is_compressed(readptr);
+
+  if (page_compressed && block->page.zip.data)
+    goto page_corrupted;
+
+  if (encrypted)
+  {
+    if (!fil_space_verify_crypt_checksum(readptr, get_page_size()))
+      goto page_corrupted;
+
+    if (!fil_space_decrypt(iter.crypt_data, readptr,
+                           get_page_size(), readptr, &err) ||
+        err != DB_SUCCESS)
+      goto func_exit;
+  }
+
+  if (page_compressed)
+  {
+    ulint compress_length = fil_page_decompress(page_compress_buf, readptr);
+    ut_ad(compress_length != srv_page_size);
+    if (compress_length == 0)
+      goto page_corrupted;
+  }
+  else if (buf_page_is_corrupted(
+            false, readptr, get_page_size(), NULL))
+    goto page_corrupted;
+
+  err = this->operator()(block);
+func_exit:
+  free(page_compress_buf);
+  return err;
+}
+
+static dberr_t fil_iterate(
 	const fil_iterator_t&	iter,
 	buf_block_t*		block,
 	AbstractCallback&	callback)
@@ -3877,7 +3960,7 @@ fil_tablespace_iterate(
 			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
 		}

-		err = fil_iterate(iter, block, callback);
+		err = callback.run(iter, block);

 		if (iter.crypt_data) {
 			fil_space_destroy_crypt_data(&iter.crypt_data);
@@ -4022,6 +4105,16 @@ row_import_for_mysql(

 		cfg.m_page_size.copy_from(univ_page_size);

+		if (UT_LIST_GET_LEN(table->indexes) > 1) {
+			ib_errf(trx->mysql_thd, IB_LOG_LEVEL_ERROR,
+				ER_INTERNAL_ERROR,
+				"Drop all secondary indexes before importing "
+				"table %s when .cfg file is missing.",
+				table->name.m_name);
+			err = DB_ERROR;
+			return row_import_error(prebuilt, trx, err);
+		}
+
 		FetchIndexRootPages	fetchIndexRootPages(table, trx);

 		err = fil_tablespace_iterate(