Commit 2c8c1548 authored by Monty's avatar Monty

MDEV-23730 s3.replication_partition 'innodb,mix' segv

This failure was caused because of several bugs:
- Someone had removed s3-slave-ignore-updates=1 from slave.cnf, which
  caused the slave to remove files that the master was working on.
- Bug in ha_partition::change_partitions() that didn't reset m_new_file
  in case of errors. This caused crashes in ha_maria::extra() as the
  maria handler was called on files that was already closed.
- In ma_pagecache there was a bug that when one got a read error one a
  big block (s3 block), it left the flag PCBLOCK_BIG_READ on for the page
  which cased an assert when the page where flushed.
- Flush all cached tables in case of ignored ALTER TABLE

Note that when merging code from 10.3, that fixes the partition bug, use
the code from this patch instead.

Changes to ma_pagecache.cc written or reviewed by Sanja
parent 71d263a1
#
# MDEV-23248 Server crashes in mi_extra /
# ha_partition::loop_extra_alter upon REORGANIZE
#
CREATE TABLE t1 (a INT, b INT) ENGINE=MyISAM PARTITION BY RANGE (a) SUBPARTITION BY HASH (a) SUBPARTITIONS 70 (PARTITION p1 VALUES LESS THAN (100), PARTITION p2 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 SELECT 4, 6 FROM seq_1_to_131072;
UPDATE t1 SET a = 7;
set @org_debug=@@debug_dbug;
set @@debug_dbug="+d,debug_abort_copy_partitions";
ALTER TABLE t1 REORGANIZE PARTITION p1,p2 INTO (PARTITION p1 VALUES LESS THAN (5), PARTITION p2 VALUES LESS THAN MAXVALUE);
ERROR 42000: Table 't1' uses an extension that doesn't exist in this MariaDB version
set @@debug_dbug=@org_debug;
DROP TABLE t1;
--source include/have_sequence.inc
--source include/have_partition.inc
--source include/have_debug.inc
--echo #
--echo # MDEV-23248 Server crashes in mi_extra /
--echo # ha_partition::loop_extra_alter upon REORGANIZE
--echo #
CREATE TABLE t1 (a INT, b INT) ENGINE=MyISAM PARTITION BY RANGE (a) SUBPARTITION BY HASH (a) SUBPARTITIONS 70 (PARTITION p1 VALUES LESS THAN (100), PARTITION p2 VALUES LESS THAN MAXVALUE);
INSERT INTO t1 SELECT 4, 6 FROM seq_1_to_131072;
UPDATE t1 SET a = 7;
set @org_debug=@@debug_dbug;
set @@debug_dbug="+d,debug_abort_copy_partitions";
--error ER_UNSUPPORTED_EXTENSION
ALTER TABLE t1 REORGANIZE PARTITION p1,p2 INTO (PARTITION p1 VALUES LESS THAN (5), PARTITION p2 VALUES LESS THAN MAXVALUE);
set @@debug_dbug=@org_debug;
DROP TABLE t1;
replication_partition : MDEV-23730: Server crashes in ha_maria::extra
replication_mixed : MDEV-23770: Replication failure
replication_stmt : MDEV-23770: Replication failure
......@@ -89,10 +89,18 @@ select sum(c1)+0 from t1;
sum(c1)+0
600
stop slave;
flush tables;
select sum(c1)+0 from t1;
sum(c1)+0
600
show create table t1;
Table Create Table
t1 CREATE TABLE `t1` (
`c1` int(11) NOT NULL,
`c2` int(11) DEFAULT NULL,
PRIMARY KEY (`c1`)
) ENGINE=S3 DEFAULT CHARSET=latin1
PARTITION BY RANGE (`c1`)
(PARTITION `p1` VALUES LESS THAN (200) ENGINE = S3,
PARTITION `p2` VALUES LESS THAN (300) ENGINE = S3,
PARTITION `p3` VALUES LESS THAN (400) ENGINE = S3,
PARTITION `p4` VALUES LESS THAN (500) ENGINE = S3)
connection master;
drop table t1;
connection slave;
......@@ -204,8 +212,6 @@ slave-bin.000001 # Query # # use `database`; alter table t1 engine=S3
slave-bin.000001 # Gtid # # GTID #-#-#
slave-bin.000001 # Query # # use `database`; set @@sql_if_exists=1; ALTER TABLE t1 ADD PARTITION (PARTITION p4 VALUES LESS THAN (500))
slave-bin.000001 # Gtid # # GTID #-#-#
slave-bin.000001 # Query # # use `database`; flush tables
slave-bin.000001 # Gtid # # GTID #-#-#
slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1` /* generated by server */
slave-bin.000001 # Gtid # # GTID #-#-#
slave-bin.000001 # Query # # use `database`; CREATE TABLE t2 (
......
......@@ -7,6 +7,12 @@
--source create_database.inc
sync_slave_with_master;
if (`select @@s3_slave_ignore_updates <> 1`)
{
die "Slave is not configured with s3-slave-ignore-updates=1";
}
let $MYSQLD_DATADIR= `select @@datadir`;
--replace_result $database database
--eval use $database
......@@ -65,20 +71,8 @@ sync_slave_with_master;
select sum(c1)+0 from t1;
stop slave;
# .frm amd .par files should not exists on the salve as it has just seen the
# ALTER TABLE which cased the removal of the .frm and .par files. The table
# from the above "select sum()" came from table cache and was used as it's
# id matches the one in S3
--error 1
--file_exists $MYSQLD_DATADIR/$database/t1.frm
--error 1
--file_exists $MYSQLD_DATADIR/$database/t1.par
# Flushing the table cache will force the .frm and .par files to be
# re-generated
flush tables;
select sum(c1)+0 from t1;
--file_exists $MYSQLD_DATADIR/$database/t1.frm
--file_exists $MYSQLD_DATADIR/$database/t1.par
# Ensure the slave is using the new table
show create table t1;
connection master;
drop table t1;
......
......@@ -2,6 +2,11 @@
plugin-maturity = alpha
plugin-load-add=@ENV.HA_S3_SO
s3=ON
s3-slave-ignore-updates=1
# You can change the following when running the tests against
# your own S3 setup
#s3-host-name=s3.amazonaws.com
#s3-protocol-version=Amazon
#s3-bucket=MariaDB
......
......@@ -2069,6 +2069,7 @@ int ha_partition::change_partitions(HA_CREATE_INFO *create_info,
DBUG_ASSERT(part_elem->part_state == PART_TO_BE_REORGED);
part_elem->part_state= PART_TO_BE_DROPPED;
}
DBUG_ASSERT(m_new_file == 0);
m_new_file= new_file_array;
if (unlikely((error= copy_partitions(copied, deleted))))
{
......@@ -2077,6 +2078,7 @@ int ha_partition::change_partitions(HA_CREATE_INFO *create_info,
They will later be deleted through the ddl-log.
*/
cleanup_new_partition(part_count);
m_new_file= 0;
}
DBUG_RETURN(error);
}
......@@ -2166,6 +2168,8 @@ int ha_partition::copy_partitions(ulonglong * const copied,
file->ha_rnd_end();
reorg_part++;
}
DBUG_EXECUTE_IF("debug_abort_copy_partitions",
DBUG_RETURN(HA_ERR_UNSUPPORTED); );
DBUG_RETURN(FALSE);
error:
m_reorged_file[reorg_part]->ha_rnd_end();
......
......@@ -9739,6 +9739,22 @@ static int create_table_for_inplace_alter(THD *thd,
}
/*
log query if slave thread and send my_ok()
Help function for mysql_alter_table()
*/
static bool log_and_ok(THD *thd)
{
if (thd->slave_thread &&
write_bin_log(thd, true, thd->query(), thd->query_length()))
return(true);
my_ok(thd);
return(0);
}
/**
Alter table
......@@ -9873,10 +9889,7 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db,
table_list->mdl_request.type= MDL_EXCLUSIVE;
/* This will only drop the .frm file and local tables, not shared ones */
error= mysql_rm_table(thd, table_list, 1, 0, 0, 1);
if (write_bin_log(thd, true, thd->query(), thd->query_length()) || error)
DBUG_RETURN(true);
my_ok(thd);
DBUG_RETURN(0);
DBUG_RETURN(log_and_ok(thd));
}
/*
......@@ -9908,16 +9921,14 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db,
the statement as this slave may not have the table shared
*/
thd->clear_error();
if (thd->slave_thread &&
write_bin_log(thd, true, thd->query(), thd->query_length()))
DBUG_RETURN(true);
my_ok(thd);
DBUG_RETURN(0);
DBUG_RETURN(log_and_ok(thd));
}
}
DBUG_RETURN(true);
}
table= table_list->table;
#ifdef WITH_WSREP
if (WSREP(thd) &&
(thd->lex->sql_command == SQLCOM_ALTER_TABLE ||
......@@ -9929,7 +9940,6 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db,
DEBUG_SYNC(thd, "alter_table_after_open_tables");
table= table_list->table;
if (table->versioned())
{
if (handlerton *hton1= create_info->db_type)
......@@ -9968,12 +9978,17 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING *new_db,
Alter_table_ctx alter_ctx(thd, table_list, tables_opened, new_db, new_name);
mdl_ticket= table->mdl_ticket;
/*
We have to do a check also after table is opened as there could be no
ENGINE= on the command line or the table could a partitioned S3 table.
*/
if (table->file->check_if_updates_are_ignored("ALTER"))
{
/*
Table is a shared table. Remove the .frm file. Discovery will create
a new one if needed.
*/
table->s->tdc->flushed= 1; // Force close of all instances
if (thd->mdl_context.upgrade_shared_lock(mdl_ticket,
MDL_EXCLUSIVE,
thd->variables.lock_wait_timeout))
......
......@@ -2810,7 +2810,7 @@ static void read_big_block(PAGECACHE *pagecache,
if (block_to_read->status & PCBLOCK_ERROR)
{
/* We get first block with an error so all operation failed */
goto error;
DBUG_VOID_RETURN;
}
if (block_to_read->status & PCBLOCK_BIG_READ)
{
......@@ -2842,7 +2842,11 @@ static void read_big_block(PAGECACHE *pagecache,
// page should be read by other thread
DBUG_ASSERT(block->status & PCBLOCK_READ ||
block->status & PCBLOCK_ERROR);
DBUG_ASSERT(block->status & PCBLOCK_BIG_READ);
/*
It is possible that other thread already removed the flag (in
case of two threads waiting) but it will not make harm to try to
remove it even in that case.
*/
block->status&= ~PCBLOCK_BIG_READ;
// all is read => lets finish nice
DBUG_ASSERT(block_to_read != block);
......@@ -2960,9 +2964,8 @@ static void read_big_block(PAGECACHE *pagecache,
}
pagecache->big_block_free(&data);
block_to_read->status&= ~PCBLOCK_BIG_READ;
end:
block_to_read->status&= ~PCBLOCK_BIG_READ;
if (block_to_read != block)
{
remove_reader(block_to_read);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment