Commit 7bbe324f authored by Andrei Elkin's avatar Andrei Elkin

MDEV-13577 slave_parallel_mode=optimistic should not report the mode's

           specific temporary errors

The optimistic parallel slave's worker thread could face a run-time error due to
the algorithm's specifics which allows for conflicts like the reported
"Can't find record in 'table'".
A typical stack is like

{noformat}
#0  handler::print_error (this=0x61c00008f8a0, error=149, errflag=0) at handler.cc:3650
#1  0x0000555555e95361 in write_record (thd=thd@entry=0x62a0000a2208, table=table@entry=0x61f00008ce88, info=info@entry=0x7fffdee356d0) at sql_insert.cc:1944
#2  0x0000555555ea7767 in mysql_insert (thd=thd@entry=0x62a0000a2208, table_list=0x61b00012ada0, fields=..., values_list=..., update_fields=..., update_values=..., duplic=<optimized out>, ignore=<optimized out>) at sql_insert.cc:1039
#3  0x0000555555efda90 in mysql_execute_command (thd=thd@entry=0x62a0000a2208) at sql_parse.cc:3927
#4  0x0000555555f0cc50 in mysql_parse (thd=0x62a0000a2208, rawbuf=<optimized out>, length=<optimized out>, parser_state=<optimized out>) at sql_parse.cc:7449
#5  0x00005555566d4444 in Query_log_event::do_apply_event (this=0x61200005b9c8, rgi=<optimized out>, query_arg=<optimized out>, q_len_arg=<optimized out>) at log_event.cc:4508
#6  0x00005555566d639e in Query_log_event::do_apply_event (this=<optimized out>, rgi=<optimized out>) at log_event.cc:4185
#7  0x0000555555d738cf in Log_event::apply_event (rgi=0x61d0001ea080, this=0x61200005b9c8) at log_event.h:1343
#8  apply_event_and_update_pos_apply (ev=ev@entry=0x61200005b9c8, thd=thd@entry=0x62a0000a2208, rgi=rgi@entry=0x61d0001ea080, reason=<optimized out>) at slave.cc:3479
#9  0x0000555555d8596b in apply_event_and_update_pos_for_parallel (ev=ev@entry=0x61200005b9c8, thd=thd@entry=0x62a0000a2208, rgi=rgi@entry=0x61d0001ea080) at slave.cc:3623
#10 0x00005555562aca83 in rpt_handle_event (qev=qev@entry=0x6190000fa088, rpt=rpt@entry=0x62200002bd68) at rpl_parallel.cc:50
#11 0x00005555562bd04e in handle_rpl_parallel_thread (arg=arg@entry=0x62200002bd68) at rpl_parallel.cc:1258
{noformat}

Here {{handler::print_error}} computes whether to error log the
current error when --log-warnings > 1. The decision flag is consulted
bu {{my_message_sql()}} which can be eventually called.
In the bug case the decision is to log.
However in the optimistic mode slave applier case any conflict is
attempted to resolve with rollback and retry to success. Hence the
logging is at least extraneous.

The case is fixed with adding a new flag {{ME_LOG_AS_WARN}} which
{{handler::print_error}} may propagate further on through {{my_error}}
when the error comes from an optimistically running slave worker thread.

The new flag effectively requests the warning level for the errlog record,
while the thread's DA records the actual error (which is regarded as temporary one
by the parallel slave error handler).
parent 26be5072
...@@ -112,6 +112,7 @@ typedef struct my_aio_result { ...@@ -112,6 +112,7 @@ typedef struct my_aio_result {
#define ME_JUST_INFO 1024 /**< not error but just info */ #define ME_JUST_INFO 1024 /**< not error but just info */
#define ME_JUST_WARNING 2048 /**< not error but just warning */ #define ME_JUST_WARNING 2048 /**< not error but just warning */
#define ME_FATALERROR 4096 /* Fatal statement error */ #define ME_FATALERROR 4096 /* Fatal statement error */
#define ME_LOG_AS_WARN 8192 /* is error but error-logged as warning */
/* Bits in last argument to fn_format */ /* Bits in last argument to fn_format */
#define MY_REPLACE_DIR 1 /* replace dir in name with 'dir' */ #define MY_REPLACE_DIR 1 /* replace dir in name with 'dir' */
......
include/rpl_init.inc [topology=1->2] include/rpl_init.inc [topology=1->2]
call mtr.add_suppression("Warning.*Deadlock found when trying to get lock; try restarting transaction");
call mtr.add_suppression("Warning.*mysqld: Can't find record in 't2'");
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads; SET @old_parallel_threads=@@GLOBAL.slave_parallel_threads;
...@@ -543,6 +545,27 @@ a b ...@@ -543,6 +545,27 @@ a b
57 7 57 7
58 8 58 8
59 9 59 9
DELETE FROM t1;
DELETE FROM t2;
include/save_master_gtid.inc
include/sync_with_master_gtid.inc
BEGIN;
INSERT INTO t1 SET a=1;
SET @save.binlog_format=@@session.binlog_format;
SET @@SESSION.binlog_format=row;
BEGIN;
INSERT INTO t1 SET a=1;
INSERT INTO t2 SET a=1;
COMMIT;
BEGIN;
DELETE FROM t2;
COMMIT;
ROLLBACK;
SET @@SESSION.binlog_format= @save.binlog_format;
DELETE FROM t1;
DELETE FROM t2;
include/save_master_gtid.inc
include/sync_with_master_gtid.inc
include/stop_slave.inc include/stop_slave.inc
SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_mode=@old_parallel_mode;
SET GLOBAL slave_parallel_threads=@old_parallel_threads; SET GLOBAL slave_parallel_threads=@old_parallel_threads;
......
...@@ -4,6 +4,11 @@ ...@@ -4,6 +4,11 @@
--let $rpl_topology=1->2 --let $rpl_topology=1->2
--source include/rpl_init.inc --source include/rpl_init.inc
--connection server_2
call mtr.add_suppression("Warning.*Deadlock found when trying to get lock; try restarting transaction");
# The following instruction is a part of the proof of MDEV-13577 fixes, below.
call mtr.add_suppression("Warning.*mysqld: Can't find record in 't2'");
--connection server_1 --connection server_1
ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB; ALTER TABLE mysql.gtid_slave_pos ENGINE=InnoDB;
CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB; CREATE TABLE t1 (a int PRIMARY KEY, b INT) ENGINE=InnoDB;
...@@ -480,8 +485,65 @@ SELECT * FROM t2 WHERE a >= 40 ORDER BY a; ...@@ -480,8 +485,65 @@ SELECT * FROM t2 WHERE a >= 40 ORDER BY a;
SELECT * FROM t1 WHERE a >= 40 ORDER BY a; SELECT * FROM t1 WHERE a >= 40 ORDER BY a;
SELECT * FROM t2 WHERE a >= 40 ORDER BY a; SELECT * FROM t2 WHERE a >= 40 ORDER BY a;
# Clean up. # partial cleanup to reuse the tables by following tests
--connection server_1
DELETE FROM t1;
DELETE FROM t2;
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
#
# MDEV-13577 optimistic parallel slave errors out to error log unnecessary
#
# The 1st of the following two trx:s a blocker on slave
--connection server_2
BEGIN;
INSERT INTO t1 SET a=1;
--connection server_1
SET @save.binlog_format=@@session.binlog_format;
SET @@SESSION.binlog_format=row;
BEGIN;
INSERT INTO t1 SET a=1;
INSERT INTO t2 SET a=1;
COMMIT;
# This transaction is going to win optimistical race with above INSERT
# on slave while being depend on it. That means it will face a kind of temporary error
# and then will retry to succeed.
BEGIN;
DELETE FROM t2;
COMMIT;
# First make sure DELETE raced indeed to get stuck at retrying stage
# where it runs "realistically" now. There is nomore optimistic error
# in the errorlog, which is downgraded to the warning level (when
# --log-warnings > 1), see above suppression.
--connection server_2
--let $wait_condition= SELECT COUNT(*) = 1 FROM information_schema.processlist WHERE state = "Waiting for prior transaction to commit"
--source include/wait_condition.inc
# Next release the 1st trx to commit.
--connection server_2
ROLLBACK;
# MDEV-13577 local cleanup:
--connection server_1
SET @@SESSION.binlog_format= @save.binlog_format;
DELETE FROM t1;
DELETE FROM t2;
--source include/save_master_gtid.inc
--connection server_2
--source include/sync_with_master_gtid.inc
#
# Clean up.
#
--connection server_2 --connection server_2
--source include/stop_slave.inc --source include/stop_slave.inc
SET GLOBAL slave_parallel_mode=@old_parallel_mode; SET GLOBAL slave_parallel_mode=@old_parallel_mode;
......
...@@ -3639,12 +3639,15 @@ void handler::print_error(int error, myf errflag) ...@@ -3639,12 +3639,15 @@ void handler::print_error(int error, myf errflag)
if ((debug_assert_if_crashed_table || if ((debug_assert_if_crashed_table ||
global_system_variables.log_warnings > 1)) global_system_variables.log_warnings > 1))
{ {
THD *thd= ha_thd();
/* /*
Log error to log before we crash or if extended warnings are requested Log error to log before we crash or if extended warnings are requested
*/ */
errflag|= ME_NOREFRESH; errflag|= ME_NOREFRESH;
if (thd && thd->is_optimistic_slave_worker())
errflag|= ME_LOG_AS_WARN;
} }
} }
/* if we got an OS error from a file-based engine, specify a path of error */ /* if we got an OS error from a file-based engine, specify a path of error */
if (error < HA_ERR_FIRST && bas_ext()[0]) if (error < HA_ERR_FIRST && bas_ext()[0])
......
...@@ -3517,6 +3517,16 @@ void my_message_sql(uint error, const char *str, myf MyFlags) ...@@ -3517,6 +3517,16 @@ void my_message_sql(uint error, const char *str, myf MyFlags)
level= Sql_condition::WARN_LEVEL_WARN; level= Sql_condition::WARN_LEVEL_WARN;
func= sql_print_warning; func= sql_print_warning;
} }
else if (MyFlags & ME_LOG_AS_WARN)
{
/*
Typical use case is optimistic parallel slave where DA needs to hold
an error condition caused by the current error, but the error-log
level is relaxed to the warning one.
*/
level= Sql_condition::WARN_LEVEL_ERROR;
func= sql_print_warning;
}
else else
{ {
level= Sql_condition::WARN_LEVEL_ERROR; level= Sql_condition::WARN_LEVEL_ERROR;
......
...@@ -7100,6 +7100,13 @@ bool THD::rgi_have_temporary_tables() ...@@ -7100,6 +7100,13 @@ bool THD::rgi_have_temporary_tables()
return rgi_slave->rli->save_temporary_tables != 0; return rgi_slave->rli->save_temporary_tables != 0;
} }
bool THD::is_optimistic_slave_worker()
{
DBUG_ASSERT(system_thread != SYSTEM_THREAD_SLAVE_SQL || rgi_slave);
return system_thread == SYSTEM_THREAD_SLAVE_SQL && rgi_slave &&
rgi_slave->speculation == rpl_group_info::SPECULATE_OPTIMISTIC;
}
void void
wait_for_commit::reinit() wait_for_commit::reinit()
......
...@@ -4167,6 +4167,12 @@ class THD :public Statement, ...@@ -4167,6 +4167,12 @@ class THD :public Statement,
(THD_TRANS::DID_WAIT | THD_TRANS::CREATED_TEMP_TABLE | (THD_TRANS::DID_WAIT | THD_TRANS::CREATED_TEMP_TABLE |
THD_TRANS::DROPPED_TEMP_TABLE | THD_TRANS::DID_DDL)); THD_TRANS::DROPPED_TEMP_TABLE | THD_TRANS::DID_DDL));
} }
/*
Returns true when the thread handle belongs to a slave worker thread
running in the optimistic execution mode.
*/
bool is_optimistic_slave_worker();
}; };
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment