Commit ffc5d064 authored by Monty's avatar Monty

MDEV-24087 s3.replication_partition fails in buildbot wiht replication failure

A few of the failures was because of missing sync_slave_to_master in
the test suite.

However, the biggest reason for most faulures was that in case of
ALTER PARTITION the master writes the query to the binary log before
it has updated the .frm and .par files. This causes a problem for an
S3 slave as it will start execute the ALTER PARTITION but get old .frm and
.par files from S3 which causes "open table" to fail, either with an error
or in some case with a crash.
Fixed
parent bd5ac038
......@@ -81,6 +81,8 @@ show create table t2;
connection master;
drop table t2;
sync_slave_with_master;
connection master;
--echo #
--echo # Test RENAME
......
......@@ -105,6 +105,8 @@ t2 CREATE TABLE `t2` (
) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1
connection master;
drop table t2;
connection slave;
connection master;
#
# Test RENAME
#
......
......@@ -13,10 +13,14 @@ PARTITION BY HASH (c1)
PARTITIONS 3;
INSERT INTO t1 VALUE (1), (2), (101), (102), (201), (202);
ALTER TABLE t1 ENGINE=S3;
connection slave;
connection master;
ALTER TABLE t1 ADD PARTITION PARTITIONS 6;
select sum(c1) from t1;
sum(c1)
609
connection slave;
connection master;
ALTER TABLE t1 ADD COLUMN c INT;
select sum(c1) from t1;
sum(c1)
......@@ -108,6 +112,8 @@ select sum(c1) from t1;
ERROR 42S02: Table 'database.t1' doesn't exist
start slave;
connection master;
connection slave;
connection master;
#
# Check altering partitioned table to S3 and back
# Checks also rename partitoned table and drop partition
......
......@@ -29,8 +29,12 @@ CREATE TABLE t1 (
PARTITIONS 3;
INSERT INTO t1 VALUE (1), (2), (101), (102), (201), (202);
ALTER TABLE t1 ENGINE=S3;
sync_slave_with_master;
connection master;
ALTER TABLE t1 ADD PARTITION PARTITIONS 6;
select sum(c1) from t1;
sync_slave_with_master;
connection master;
ALTER TABLE t1 ADD COLUMN c INT;
select sum(c1) from t1;
sync_slave_with_master;
......@@ -85,6 +89,8 @@ select sum(c1) from t1;
--file_exists $MYSQLD_DATADIR/$database/t1.par
start slave;
connection master;
sync_slave_with_master;
connection master;
--echo #
--echo # Check altering partitioned table to S3 and back
......
......@@ -105,6 +105,8 @@ t2 CREATE TABLE `t2` (
) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1
connection master;
drop table t2;
connection slave;
connection master;
#
# Test RENAME
#
......
......@@ -7174,23 +7174,24 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
4) Close the table that have already been opened but didn't stumble on
the abort locked previously. This is done as part of the
alter_close_table call.
5) Write the bin log
Unfortunately the writing of the binlog is not synchronised with
other logging activities. So no matter in which order the binlog
is written compared to other activities there will always be cases
where crashes make strange things occur. In this placement it can
happen that the ALTER TABLE DROP PARTITION gets performed in the
master but not in the slaves if we have a crash, after writing the
ddl log but before writing the binlog. A solution to this would
require writing the statement first in the ddl log and then
when recovering from the crash read the binlog and insert it into
the binlog if not written already.
5) Old place for binary logging
6) Install the previously written shadow frm file
7) Prepare handlers for drop of partitions
8) Drop the partitions
9) Remove entries from ddl log
10) Reopen table if under lock tables
11) Complete query
11) Write the bin log
Unfortunately the writing of the binlog is not synchronised with
other logging activities. So no matter in which order the binlog
is written compared to other activities there will always be cases
where crashes make strange things occur. In this placement it can
happen that the ALTER TABLE DROP PARTITION gets performed in the
master but not in the slaves if we have a crash, after writing the
ddl log but before writing the binlog. A solution to this would
require writing the statement first in the ddl log and then
when recovering from the crash read the binlog and insert it into
the binlog if not written already.
12) Complete query
We insert Error injections at all places where it could be interesting
to test if recovery is properly done.
......@@ -7211,9 +7212,6 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
alter_close_table(lpt) ||
ERROR_INJECT_CRASH("crash_drop_partition_5") ||
ERROR_INJECT_ERROR("fail_drop_partition_5") ||
((!thd->lex->no_write_to_binlog) &&
(write_bin_log(thd, FALSE,
thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_drop_partition_6") ||
ERROR_INJECT_ERROR("fail_drop_partition_6") ||
(frm_install= TRUE, FALSE) ||
......@@ -7225,6 +7223,9 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
ERROR_INJECT_CRASH("crash_drop_partition_8") ||
ERROR_INJECT_ERROR("fail_drop_partition_8") ||
(write_log_completed(lpt, FALSE), FALSE) ||
((!thd->lex->no_write_to_binlog) &&
(write_bin_log(thd, FALSE,
thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_drop_partition_9") ||
ERROR_INJECT_ERROR("fail_drop_partition_9"))
{
......@@ -7257,7 +7258,7 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
3) Write an entry to remove the new parttions if crash occurs
4) Add the new partitions.
5) Close all instances of the table and remove them from the table cache.
6) Write binlog
6) Old place for write binlog
7) Now the change is completed except for the installation of the
new frm file. We thus write an action in the log to change to
the shadow frm file
......@@ -7265,7 +7266,8 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
added to the table.
9) Remove entries from ddl log
10)Reopen tables if under lock tables
11)Complete query
11)Write to binlog
12)Complete query
*/
if (write_log_drop_shadow_frm(lpt) ||
ERROR_INJECT_CRASH("crash_add_partition_1") ||
......@@ -7285,9 +7287,6 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
alter_close_table(lpt) ||
ERROR_INJECT_CRASH("crash_add_partition_6") ||
ERROR_INJECT_ERROR("fail_add_partition_6") ||
((!thd->lex->no_write_to_binlog) &&
(write_bin_log(thd, FALSE,
thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_add_partition_7") ||
ERROR_INJECT_ERROR("fail_add_partition_7") ||
write_log_rename_frm(lpt) ||
......@@ -7300,6 +7299,9 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
ERROR_INJECT_CRASH("crash_add_partition_9") ||
ERROR_INJECT_ERROR("fail_add_partition_9") ||
(write_log_completed(lpt, FALSE), FALSE) ||
((!thd->lex->no_write_to_binlog) &&
(write_bin_log(thd, FALSE,
thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_add_partition_10") ||
ERROR_INJECT_ERROR("fail_add_partition_10"))
{
......@@ -7356,13 +7358,14 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
5) Close the table.
6) Log that operation is completed and log all complete actions
needed to complete operation from here.
7) Write bin log.
7) Old place for write bin log.
8) Prepare handlers for rename and delete of partitions.
9) Rename and drop the reorged partitions such that they are no
longer used and rename those added to their real new names.
10) Install the shadow frm file.
11) Reopen the table if under lock tables.
12) Complete query.
12) Write to binlog
13) Complete query.
*/
if (write_log_drop_shadow_frm(lpt) ||
ERROR_INJECT_CRASH("crash_change_partition_1") ||
......@@ -7386,9 +7389,6 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
(action_completed= TRUE, FALSE) ||
ERROR_INJECT_CRASH("crash_change_partition_7") ||
ERROR_INJECT_ERROR("fail_change_partition_7") ||
((!thd->lex->no_write_to_binlog) &&
(write_bin_log(thd, FALSE,
thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_change_partition_8") ||
ERROR_INJECT_ERROR("fail_change_partition_8") ||
((frm_install= TRUE), FALSE) ||
......@@ -7403,6 +7403,9 @@ uint fast_alter_partition_table(THD *thd, TABLE *table,
ERROR_INJECT_CRASH("crash_change_partition_11") ||
ERROR_INJECT_ERROR("fail_change_partition_11") ||
(write_log_completed(lpt, FALSE), FALSE) ||
((!thd->lex->no_write_to_binlog) &&
(write_bin_log(thd, FALSE,
thd->query(), thd->query_length()), FALSE)) ||
ERROR_INJECT_CRASH("crash_change_partition_12") ||
ERROR_INJECT_ERROR("fail_change_partition_12"))
{
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment