Commit 190a8312 authored by Marko Mäkelä's avatar Marko Mäkelä

Merge 10.4 into 10.5

parents 5dbea46c 12672542
connection node_2;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2));
INSERT INTO t1 VALUES (1, 1, 0);
INSERT INTO t1 VALUES (3, 3, 0);
......@@ -23,14 +25,14 @@ connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_enter_sync';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
connection node_1;
COMMIT;
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_enter_sync';
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync';
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
......@@ -39,13 +41,13 @@ SET debug_sync='RESET';
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync';
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_enter_sync';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync';
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
SET GLOBAL debug_dbug = NULL;
SET debug_sync='RESET';
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_enter_sync';
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync';
SET GLOBAL wsrep_provider_options = 'dbug=';
connection node_1;
SELECT * FROM t1;
......@@ -55,8 +57,6 @@ f1 f2 f3
4 4 2
5 5 2
10 10 0
wsrep_local_replays
1
SET GLOBAL wsrep_slave_threads = DEFAULT;
connection node_2;
SELECT * FROM t1;
......@@ -68,22 +68,64 @@ f1 f2 f3
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
SELECT * FROM t1;
f1 f2 f3
1 1 0
3 3 1
4 4 2
5 5 2
7 7 7
8 8 8
10 10 0
DROP TABLE t1;
test scenario 2
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2));
INSERT INTO t1 VALUES (1, 1, 0);
INSERT INTO t1 VALUES (3, 3, 0);
INSERT INTO t1 VALUES (10, 10, 0);
SET GLOBAL wsrep_slave_threads = 3;
SET GLOBAL wsrep_provider_options = 'dbug=d,apply_monitor_slave_enter_sync';
connection node_1;
SET SESSION wsrep_sync_wait=0;
START TRANSACTION;
DELETE FROM t1 WHERE f2 = 3;
INSERT INTO t1 VALUES (3, 3, 1);
connection node_1a;
SET SESSION wsrep_sync_wait=0;
connection node_2;
INSERT INTO t1 VALUES (5, 5, 2);
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_master_enter_sync';
connection node_1;
COMMIT;
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb";
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_master_enter_sync';
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'signal=apply_monitor_slave_enter_sync';
SET GLOBAL wsrep_provider_options = 'dbug=';
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_replay_cb_reached";
SET GLOBAL wsrep_provider_options = 'dbug=d,commit_monitor_slave_enter_sync';
connection node_2;
INSERT INTO t1 VALUES (4, 4, 2);
connection node_1a;
SET SESSION wsrep_on = 0;
SET SESSION wsrep_on = 1;
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL wsrep_provider_options = 'signal=commit_monitor_slave_enter_sync';
SET GLOBAL wsrep_provider_options = 'dbug=';
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_replay_cb";
SET GLOBAL debug_dbug = NULL;
SET debug_sync='RESET';
connection node_1;
SET GLOBAL wsrep_slave_threads = DEFAULT;
connection node_2;
SELECT * FROM t1;
f1 f2 f3
1 1 0
3 3 1
4 4 2
5 5 2
7 7 7
10 10 0
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
DROP TABLE t1;
#
# This test tests the operation of transaction replay with a scenario
# where two subsequent write sets in applying conflict with local transaction
# where two subsequent write sets being applied conflict with local transaction
# in commit phase. The conflict is "false positive" confict on GAP lock in
# secondary unique index.
# The first applier will cause BF abort for the local committer, which
# starts replaying because of positive certification.
# In buggy version, scenatio continues so that ehile the local transaction
# In buggy version, the test scenario continues so that while the local transaction
# is replaying, the latter applier experiences similar UK GAP lock conflict
# and forces the replayer to abort second time.
# In fixed version, this latter BF abort should not happen.
# In fixed version, this latter replayer BF abort should not happen.
#
--source include/galera_cluster.inc
......@@ -16,15 +16,16 @@
--source include/have_debug_sync.inc
--source include/galera_have_debug_sync.inc
--let $wsrep_local_replays_old = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'`
--let $expected_wsrep_local_replays = `SELECT VARIABLE_VALUE+1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'`
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2));
INSERT INTO t1 VALUES (1, 1, 0);
INSERT INTO t1 VALUES (3, 3, 0);
INSERT INTO t1 VALUES (10, 10, 0);
# we will need 2 appliers threads for applyin two write sets in parallel in node1
# and 1 applier thread for handling replaying
# we will need 2 appliers threads for applyin two writes ets in parallel in node1
# and 1 applier thread for handling replaying
SET GLOBAL wsrep_slave_threads = 3;
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
......@@ -42,7 +43,7 @@ INSERT INTO t1 VALUES (3, 3, 1);
--connection node_1a
SET SESSION wsrep_sync_wait=0;
# send from node 2 first INSERT transaction, which will conflict on GAP lock in node 1
# send from node 2 first an INSERT transaction, which will conflict on GAP lock in node 1
--connection node_2
INSERT INTO t1 VALUES (5, 5, 2);
......@@ -67,7 +68,7 @@ INSERT INTO t1 VALUES (4, 4, 2);
# both appliers are now waiting in separate sync points
# Block the local commit, send the COMMIT and wait until it gets blocked
--let $galera_sync_point = commit_monitor_enter_sync
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_set_sync_point.inc
--connection node_1
......@@ -75,12 +76,12 @@ INSERT INTO t1 VALUES (4, 4, 2);
--connection node_1a
# wait for the local commit to enter in commit monitor wait state
--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_enter_sync
--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
# release the local transaction to continue with commit
--let $galera_sync_point = commit_monitor_enter_sync
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_signal_sync_point.inc
--source include/galera_clear_sync_point.inc
......@@ -90,6 +91,11 @@ SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
SET GLOBAL debug_dbug = NULL;
SET debug_sync='RESET';
# wait for BF abort to happen and replaying begin
--let $wait_condition = SELECT VARIABLE_VALUE= $expected_wsrep_local_replays FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays';
--let $wait_condition_on_error_output= SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays';
--source include/wait_condition_with_debug.inc
# set another sync point for second applier
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
......@@ -97,11 +103,11 @@ SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_apply_cb";
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_signal_sync_point.inc
# waiting until second applier is in wait
# waiting until second applier is in wait state
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_apply_cb_reached";
# stopping second applier before commit
--let $galera_sync_point = commit_monitor_enter_sync
--let $galera_sync_point = commit_monitor_slave_enter_sync
--source include/galera_set_sync_point.inc
--source include/galera_clear_sync_point.inc
......@@ -112,9 +118,9 @@ SET DEBUG_SYNC = "now SIGNAL signal.wsrep_apply_cb";
SET GLOBAL debug_dbug = NULL;
SET debug_sync='RESET';
# with fixed version, second applier has reached commit monitor, and we can
# with fixed version, second applier has reached comit monitor, and we can
# release it to complete
--let $galera_sync_point = commit_monitor_enter_sync
--let $galera_sync_point = commit_monitor_slave_enter_sync
--source include/galera_signal_sync_point.inc
--source include/galera_clear_sync_point.inc
......@@ -124,12 +130,6 @@ SET debug_sync='RESET';
SELECT * FROM t1;
# wsrep_local_replays has increased by 1
--let $wsrep_local_replays_new = `SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'`
--disable_query_log
--eval SELECT $wsrep_local_replays_new - $wsrep_local_replays_old = 1 AS wsrep_local_replays;
--enable_query_log
# returning original slave thread count
SET GLOBAL wsrep_slave_threads = DEFAULT;
......@@ -140,9 +140,137 @@ SELECT * FROM t1;
# original state in node 1
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
SELECT * FROM t1;
DROP TABLE t1;
##################################################################################
# test scenario 2
#
# commit order is now: INSERT-1, local COMMIT, INSERT-2
# while local trx is replaying, the latter applier has applied and is waiting
# for commit.
# The point in this scenario is to verify that replayer does not try to abort
# the latter applier
#################################################################################
--echo test scenario 2
--connection node_1
--let $expected_wsrep_local_replays = `SELECT VARIABLE_VALUE+1 FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays'`
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY, f2 int, f3 int, unique key keyj (f2));
INSERT INTO t1 VALUES (1, 1, 0);
INSERT INTO t1 VALUES (3, 3, 0);
INSERT INTO t1 VALUES (10, 10, 0);
# we will need 2 appliers threads for applyin two writes sets in parallel in node1
# and 1 applier thread for handling replaying
SET GLOBAL wsrep_slave_threads = 3;
# set sync point for the first INSERT applier
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_set_sync_point.inc
--connection node_1
# starting a transaction, which deletes and inserts the middle row in test table
# this will be victim of false positive conflict with appliers
SET SESSION wsrep_sync_wait=0;
START TRANSACTION;
DELETE FROM t1 WHERE f2 = 3;
INSERT INTO t1 VALUES (3, 3, 1);
# Control connection to manage sync points for appliers
--connection node_1a
SET SESSION wsrep_sync_wait=0;
# send from node 2 first an INSERT transaction, which will conflict on GAP lock in node 1
--connection node_2
INSERT INTO t1 VALUES (5, 5, 2);
--connection node_1a
# wait to see the INSERT in apply_cb sync point
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
# Block the local commit, send the COMMIT and wait until it gets blocked
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_set_sync_point.inc
--connection node_1
--send COMMIT
--connection node_1a
# wait for the local commit to enter in commit monitor wait state
--let $galera_sync_point = apply_monitor_slave_enter_sync commit_monitor_master_enter_sync
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
# set sync point before replaying
SET GLOBAL DEBUG_DBUG = "d,sync.wsrep_replay_cb";
# release the local transaction to continue with commit
# it should advance and end up waiting in commit monitor for his turn
--let $galera_sync_point = commit_monitor_master_enter_sync
--source include/galera_signal_sync_point.inc
--source include/galera_clear_sync_point.inc
# and now release the first applier, it should force local trx to abort
--let $galera_sync_point = apply_monitor_slave_enter_sync
--source include/galera_signal_sync_point.inc
--source include/galera_clear_sync_point.inc
# wait for BF abort to happen and replaying begin
--let $wait_condition = SELECT VARIABLE_VALUE= $expected_wsrep_local_replays FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays';
--let $wait_condition_on_error_output= SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_STATUS WHERE VARIABLE_NAME = 'wsrep_local_replays';
--source include/wait_condition_with_debug.inc
# replayer should now be in stopped in sync point
SET SESSION DEBUG_SYNC = "now WAIT_FOR sync.wsrep_replay_cb_reached";
# set sync point for the second INSERT
--let $galera_sync_point = commit_monitor_slave_enter_sync
--source include/galera_set_sync_point.inc
--connection node_2
# send second insert into same GAP in test table
INSERT INTO t1 VALUES (4, 4, 2);
--connection node_1a
# wait for the second applier to enter in commit monitor wait state
--let $galera_sync_point = commit_monitor_slave_enter_sync
--source include/galera_wait_sync_point.inc
--source include/galera_clear_sync_point.inc
# and, letting the second appier to move forward, it will stop naturally
# to wait for commit order after replayer's commit
--let $galera_sync_point = commit_monitor_slave_enter_sync
--source include/galera_signal_sync_point.inc
--source include/galera_clear_sync_point.inc
# and now release the replayer, if all is good,it will commit before the second applier
SET GLOBAL DEBUG_DBUG = "";
SET DEBUG_SYNC = "now SIGNAL signal.wsrep_replay_cb";
SET GLOBAL debug_dbug = NULL;
SET debug_sync='RESET';
# local commit should succeed
--connection node_1
--reap
--let $wait_condition = SELECT COUNT(*)=5 FROM t1;
--source include/wait_condition.inc
# returning original slave thread count
SET GLOBAL wsrep_slave_threads = DEFAULT;
--connection node_2
SELECT * FROM t1;
# replicate some transactions, so that wsrep slave thread count can reach
# original state in node 1
INSERT INTO t1 VALUES (7,7,7);
INSERT INTO t1 VALUES (8,8,8);
DROP TABLE t1;
......@@ -193,6 +193,12 @@ SET DEBUG_SYNC='RESET';
CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 SET a=0;
ALTER TABLE t1 ADD COLUMN b INT NOT NULL DEFAULT 2, ADD COLUMN c INT;
BEGIN NOT ATOMIC
DECLARE c TEXT DEFAULT(SELECT CONCAT('ALTER TABLE t1 ADD (c',
GROUP_CONCAT(seq SEPARATOR ' INT, c'), ' INT), ALGORITHM=INSTANT;') FROM seq_1_to_130);
EXECUTE IMMEDIATE c;
END;
$$
connection stop_purge;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
connection default;
......@@ -211,7 +217,7 @@ SET DEBUG_SYNC = 'now SIGNAL logged';
connection ddl;
connection default;
SET DEBUG_SYNC = RESET;
SELECT * FROM t1;
SELECT a, b, c FROM t1;
a b c
1 2 NULL
2 3 4
......@@ -234,7 +240,7 @@ connection ddl;
UPDATE t1 SET b = b + 1 WHERE a = 2;
connection default;
SET DEBUG_SYNC = RESET;
SELECT * FROM t1;
SELECT a, b, c FROM t1;
a b c
1 2 NULL
2 3 4
......@@ -258,7 +264,7 @@ ERROR 22004: Invalid use of NULL value
disconnect ddl;
connection default;
SET DEBUG_SYNC = RESET;
SELECT * FROM t1;
SELECT a, b, c, d FROM t1;
a b c d
1 2 NULL 1
2 3 4 1
......@@ -440,4 +446,4 @@ SELECT variable_value-@old_instant instants
FROM information_schema.global_status
WHERE variable_name = 'innodb_instant_alter_column';
instants
32
33
[redundant]
innodb_default_row_format=redundant
[dynamic]
innodb_default_row_format=dynamic
--source include/have_innodb.inc
--source include/have_debug.inc
--source include/have_debug_sync.inc
--source include/have_sequence.inc
SET @save_frequency= @@GLOBAL.innodb_purge_rseg_truncate_frequency;
SET GLOBAL innodb_purge_rseg_truncate_frequency=1;
......@@ -216,6 +217,15 @@ CREATE TABLE t1 (a INT PRIMARY KEY) ENGINE=InnoDB;
INSERT INTO t1 SET a=0;
ALTER TABLE t1 ADD COLUMN b INT NOT NULL DEFAULT 2, ADD COLUMN c INT;
DELIMITER $$;
BEGIN NOT ATOMIC
DECLARE c TEXT DEFAULT(SELECT CONCAT('ALTER TABLE t1 ADD (c',
GROUP_CONCAT(seq SEPARATOR ' INT, c'), ' INT), ALGORITHM=INSTANT;') FROM seq_1_to_130);
EXECUTE IMMEDIATE c;
END;
$$
DELIMITER ;$$
connection stop_purge;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
......@@ -243,7 +253,7 @@ reap;
connection default;
SET DEBUG_SYNC = RESET;
SELECT * FROM t1;
SELECT a, b, c FROM t1;
ALTER TABLE t1 DROP b, ALGORITHM=INSTANT;
connection stop_purge;
START TRANSACTION WITH CONSISTENT SNAPSHOT;
......@@ -272,7 +282,7 @@ UPDATE t1 SET b = b + 1 WHERE a = 2;
connection default;
SET DEBUG_SYNC = RESET;
SELECT * FROM t1;
SELECT a, b, c FROM t1;
--echo #
--echo # MDEV-15872 Crash in online ALTER TABLE...ADD PRIMARY KEY
......@@ -299,7 +309,7 @@ disconnect ddl;
connection default;
SET DEBUG_SYNC = RESET;
SELECT * FROM t1;
SELECT a, b, c, d FROM t1;
DROP TABLE t1;
--echo #
......
......@@ -3714,6 +3714,11 @@ mysql_execute_command(THD *thd)
Json_writer_object trace_command(thd);
Json_writer_array trace_command_steps(thd, "steps");
/* store old value of binlog format */
enum_binlog_format orig_binlog_format,orig_current_stmt_binlog_format;
thd->get_binlog_format(&orig_binlog_format,
&orig_current_stmt_binlog_format);
#ifdef WITH_WSREP
if (WSREP(thd))
{
......@@ -3765,12 +3770,6 @@ mysql_execute_command(THD *thd)
DBUG_ASSERT(thd->transaction->stmt.modified_non_trans_table == FALSE);
/* store old value of binlog format */
enum_binlog_format orig_binlog_format,orig_current_stmt_binlog_format;
thd->get_binlog_format(&orig_binlog_format,
&orig_current_stmt_binlog_format);
/*
Assign system variables with values specified by the clause
SET STATEMENT var1=value1 [, var2=value2, ...] FOR <statement>
......@@ -7658,9 +7657,14 @@ void THD::reset_for_next_command(bool do_clear_error)
save_prep_leaf_list= false;
DBUG_PRINT("debug",
("is_current_stmt_binlog_format_row(): %d",
is_current_stmt_binlog_format_row()));
#ifdef WITH_WSREP
#if !defined(DBUG_OFF)
if (mysql_bin_log.is_open())
#endif
#endif
DBUG_PRINT("debug",
("is_current_stmt_binlog_format_row(): %d",
is_current_stmt_binlog_format_row()));
DBUG_VOID_RETURN;
}
......
......@@ -695,6 +695,17 @@ int Wsrep_replayer_service::apply_write_set(const wsrep::ws_meta& ws_meta,
DBUG_ASSERT(thd->wsrep_trx().active());
DBUG_ASSERT(thd->wsrep_trx().state() == wsrep::transaction::s_replaying);
/* Allow tests to block the replayer thread using the DBUG facilities */
DBUG_EXECUTE_IF("sync.wsrep_replay_cb",
{
const char act[]=
"now "
"SIGNAL sync.wsrep_replay_cb_reached "
"WAIT_FOR signal.wsrep_replay_cb";
DBUG_ASSERT(!debug_sync_set_action(thd,
STRING_WITH_LEN(act)));
};);
wsrep_setup_uk_and_fk_checks(thd);
int ret= 0;
......
/*****************************************************************************
Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2014, 2020, MariaDB Corporation.
Copyright (c) 2014, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -355,12 +355,16 @@ static bool fil_node_open_file_low(fil_node_t *node)
ut_ad(!node->is_open());
ut_ad(node->space->is_closing());
ut_ad(mutex_own(&fil_system.mutex));
const auto flags= node->space->flags;
bool o_direct_possible= !FSP_FLAGS_HAS_PAGE_COMPRESSION(flags);
ulint type;
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096, "compatibility");
if (const auto ssize= FSP_FLAGS_GET_ZIP_SSIZE(flags))
if (ssize < 3)
o_direct_possible= false;
switch (FSP_FLAGS_GET_ZIP_SSIZE(node->space->flags)) {
case 1:
case 2:
type= OS_DATA_FILE_NO_O_DIRECT;
break;
default:
type= OS_DATA_FILE;
}
for (;;)
{
......@@ -369,8 +373,7 @@ static bool fil_node_open_file_low(fil_node_t *node)
node->is_raw_disk
? OS_FILE_OPEN_RAW | OS_FILE_ON_ERROR_NO_EXIT
: OS_FILE_OPEN | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_AIO, o_direct_possible
? OS_DATA_FILE : OS_DATA_FILE_NO_O_DIRECT,
OS_FILE_AIO, type,
srv_read_only_mode, &success);
if (success)
break;
......@@ -2224,10 +2227,22 @@ fil_ibd_create(
return NULL;
}
ulint type;
static_assert(((UNIV_ZIP_SIZE_MIN >> 1) << 3) == 4096,
"compatibility");
switch (FSP_FLAGS_GET_ZIP_SSIZE(flags)) {
case 1:
case 2:
type = OS_DATA_FILE_NO_O_DIRECT;
break;
default:
type = OS_DATA_FILE;
}
file = os_file_create(
innodb_data_file_key, path,
OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
OS_FILE_AIO, OS_DATA_FILE, srv_read_only_mode, &success);
OS_FILE_AIO, type, srv_read_only_mode, &success);
if (!success) {
/* The following call will print an error message */
......
......@@ -1050,7 +1050,7 @@ struct fil_node_t final
/** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */
void find_metadata(os_file_t file = OS_FILE_CLOSED
#ifdef UNIV_LINUX
#ifndef _WIN32
, struct stat* statbuf = NULL
#endif
);
......
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -937,12 +937,14 @@ rec_copy(
const rec_offs* offsets);
/** Determine the size of a data tuple prefix in a temporary file.
@tparam redundant_temp whether to use the ROW_FORMAT=REDUNDANT format
@param[in] index clustered or secondary index
@param[in] fields data fields
@param[in] n_fields number of data fields
@param[out] extra record header size
@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT
@return total size, in bytes */
template<bool redundant_temp>
ulint
rec_get_converted_size_temp(
const dict_index_t* index,
......@@ -981,11 +983,13 @@ rec_init_offsets_temp(
MY_ATTRIBUTE((nonnull));
/** Convert a data tuple prefix to the temporary file format.
@tparam redundant_temp whether to use the ROW_FORMAT=REDUNDANT format
@param[out] rec record in temporary file format
@param[in] index clustered or secondary index
@param[in] fields data fields
@param[in] n_fields number of data fields
@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT */
template<bool redundant_temp>
void
rec_convert_dtuple_to_temp(
rec_t* rec,
......
/*****************************************************************************
Copyright (c) 2011, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -247,6 +247,11 @@ row_log_apply(
ut_stage_alter_t* stage)
MY_ATTRIBUTE((warn_unused_result));
/** Get the n_core_fields of online log for the index
@param index index whose n_core_fields of log to be accessed
@return number of n_core_fields */
unsigned row_log_get_n_core_fields(const dict_index_t *index);
#ifdef HAVE_PSI_STAGE_INTERFACE
/** Estimate how much work is to be done by the log apply phase
of an ALTER TABLE for this index.
......
......@@ -2,7 +2,7 @@
Copyright (c) 1995, 2019, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2009, Percona Inc.
Copyright (c) 2013, 2020, MariaDB Corporation.
Copyright (c) 2013, 2021, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted
by Percona Inc.. Those modifications are
......@@ -2522,7 +2522,9 @@ os_file_create_func(
/* fall through */
case SRV_ALL_O_DIRECT_FSYNC:
/*Traditional Windows behavior, no buffering for any files.*/
attributes |= FILE_FLAG_NO_BUFFERING;
if (type != OS_DATA_FILE_NO_O_DIRECT) {
attributes |= FILE_FLAG_NO_BUFFERING;
}
break;
case SRV_FSYNC:
......@@ -4393,7 +4395,7 @@ static bool is_file_on_ssd(char *file_path)
/** Determine some file metadata when creating or reading the file.
@param file the file that is being created, or OS_FILE_CLOSED */
void fil_node_t::find_metadata(os_file_t file
#ifdef UNIV_LINUX
#ifndef _WIN32
, struct stat* statbuf
#endif
)
......@@ -4433,18 +4435,18 @@ void fil_node_t::find_metadata(os_file_t file
block_size = 512;
}
#else
on_ssd = space->atomic_write_supported;
# ifdef UNIV_LINUX
if (!on_ssd) {
struct stat sbuf;
if (!statbuf && !fstat(file, &sbuf)) {
statbuf = &sbuf;
}
if (statbuf && fil_system.is_ssd(statbuf->st_dev)) {
on_ssd = true;
}
struct stat sbuf;
if (!statbuf && !fstat(file, &sbuf)) {
statbuf = &sbuf;
}
if (statbuf) {
block_size = statbuf->st_blksize;
}
on_ssd = space->atomic_write_supported
# ifdef UNIV_LINUX
|| (statbuf && fil_system.is_ssd(statbuf->st_dev))
# endif
;
#endif
if (!space->atomic_write_supported) {
space->atomic_write_supported = atomic_write
......@@ -4478,7 +4480,6 @@ bool fil_node_t::read_page0()
if (fstat(handle, &statbuf)) {
return false;
}
block_size = statbuf.st_blksize;
os_offset_t size_bytes = statbuf.st_size;
#else
os_offset_t size_bytes = os_file_get_size(handle);
......
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -29,6 +29,7 @@ Created 5/30/1994 Heikki Tuuri
#include "mtr0log.h"
#include "fts0fts.h"
#include "trx0sys.h"
#include "row0log.h"
/* PHYSICAL RECORD (OLD STYLE)
===========================
......@@ -1106,7 +1107,8 @@ rec_get_nth_field_offs_old(
}
/** Determine the size of a data tuple prefix in ROW_FORMAT=COMPACT.
@tparam mblob whether the record includes a metadata BLOB
@tparam mblob whether the record includes a metadata BLOB
@tparam redundant_temp whether to use the ROW_FORMAT=REDUNDANT format
@param[in] index record descriptor; dict_table_is_comp()
is assumed to hold, even if it doesn't
@param[in] dfield array of data fields
......@@ -1115,7 +1117,7 @@ rec_get_nth_field_offs_old(
@param[in] status status flags
@param[in] temp whether this is a temporary file record
@return total size */
template<bool mblob = false>
template<bool mblob = false, bool redundant_temp = false>
static inline
ulint
rec_get_converted_size_comp_prefix_low(
......@@ -1132,25 +1134,27 @@ rec_get_converted_size_comp_prefix_low(
ut_d(ulint n_null = index->n_nullable);
ut_ad(status == REC_STATUS_ORDINARY || status == REC_STATUS_NODE_PTR
|| status == REC_STATUS_INSTANT);
unsigned n_core_fields = redundant_temp
? row_log_get_n_core_fields(index)
: index->n_core_fields;
if (mblob) {
ut_ad(!temp);
ut_ad(index->table->instant);
ut_ad(index->is_instant());
ut_ad(!redundant_temp && index->is_instant());
ut_ad(status == REC_STATUS_INSTANT);
ut_ad(n_fields == ulint(index->n_fields) + 1);
extra_size += UT_BITS_IN_BYTES(index->n_nullable)
+ rec_get_n_add_field_len(n_fields - 1
- index->n_core_fields);
- n_core_fields);
} else if (status == REC_STATUS_INSTANT
&& (!temp || n_fields > index->n_core_fields)) {
ut_ad(index->is_instant());
&& (!temp || n_fields > n_core_fields)) {
if (!redundant_temp) { ut_ad(index->is_instant()); }
ut_ad(UT_BITS_IN_BYTES(n_null) >= index->n_core_null_bytes);
extra_size += UT_BITS_IN_BYTES(index->get_n_nullable(n_fields))
+ rec_get_n_add_field_len(n_fields - 1
- index->n_core_fields);
- n_core_fields);
} else {
ut_ad(n_fields <= index->n_core_fields);
ut_ad(n_fields <= n_core_fields);
extra_size += index->n_core_null_bytes;
}
......@@ -1449,7 +1453,8 @@ rec_convert_dtuple_to_rec_old(
}
/** Convert a data tuple into a ROW_FORMAT=COMPACT record.
@tparam mblob whether the record includes a metadata BLOB
@tparam mblob whether the record includes a metadata BLOB
@tparam redundant_temp whether to use the ROW_FORMAT=REDUNDANT format
@param[out] rec converted record
@param[in] index index
@param[in] field data fields to convert
......@@ -1457,7 +1462,7 @@ rec_convert_dtuple_to_rec_old(
@param[in] status rec_get_status(rec)
@param[in] temp whether to use the format for temporary files
in index creation */
template<bool mblob = false>
template<bool mblob = false, bool redundant_temp = false>
static inline
void
rec_convert_dtuple_to_rec_comp(
......@@ -1474,7 +1479,9 @@ rec_convert_dtuple_to_rec_comp(
byte* UNINIT_VAR(lens);
ulint UNINIT_VAR(n_node_ptr_field);
ulint null_mask = 1;
const ulint n_core_fields = redundant_temp
? row_log_get_n_core_fields(index)
: index->n_core_fields;
ut_ad(n_fields > 0);
ut_ad(temp || dict_table_is_comp(index->table));
ut_ad(index->n_core_null_bytes <= UT_BITS_IN_BYTES(index->n_nullable));
......@@ -1484,11 +1491,10 @@ rec_convert_dtuple_to_rec_comp(
if (mblob) {
ut_ad(!temp);
ut_ad(index->table->instant);
ut_ad(index->is_instant());
ut_ad(!redundant_temp && index->is_instant());
ut_ad(status == REC_STATUS_INSTANT);
ut_ad(n_fields == ulint(index->n_fields) + 1);
rec_set_n_add_field(nulls, n_fields - 1
- index->n_core_fields);
rec_set_n_add_field(nulls, n_fields - 1 - n_core_fields);
rec_set_bit_field_2(rec, PAGE_HEAP_NO_USER_LOW,
REC_NEW_HEAP_NO, REC_HEAP_NO_MASK,
REC_HEAP_NO_SHIFT);
......@@ -1499,10 +1505,9 @@ rec_convert_dtuple_to_rec_comp(
}
switch (status) {
case REC_STATUS_INSTANT:
ut_ad(index->is_instant());
ut_ad(n_fields > index->n_core_fields);
rec_set_n_add_field(nulls, n_fields - 1
- index->n_core_fields);
if (!redundant_temp) { ut_ad(index->is_instant()); }
ut_ad(n_fields > n_core_fields);
rec_set_n_add_field(nulls, n_fields - 1 - n_core_fields);
/* fall through */
case REC_STATUS_ORDINARY:
ut_ad(n_fields <= dict_index_get_n_fields(index));
......@@ -1510,10 +1515,9 @@ rec_convert_dtuple_to_rec_comp(
rec_set_bit_field_2(rec, PAGE_HEAP_NO_USER_LOW,
REC_NEW_HEAP_NO, REC_HEAP_NO_MASK,
REC_HEAP_NO_SHIFT);
rec_set_status(
rec, n_fields == index->n_core_fields
? REC_STATUS_ORDINARY
: REC_STATUS_INSTANT);
rec_set_status(rec, n_fields == n_core_fields
? REC_STATUS_ORDINARY
: REC_STATUS_INSTANT);
}
if (dict_table_is_comp(index->table)) {
......@@ -1741,12 +1745,14 @@ rec_convert_dtuple_to_rec(
}
/** Determine the size of a data tuple prefix in a temporary file.
@tparam redundant_temp whether to use the ROW_FORMAT=REDUNDANT format
@param[in] index clustered or secondary index
@param[in] fields data fields
@param[in] n_fields number of data fields
@param[out] extra record header size
@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT
@return total size, in bytes */
template<bool redundant_temp>
ulint
rec_get_converted_size_temp(
const dict_index_t* index,
......@@ -1755,10 +1761,18 @@ rec_get_converted_size_temp(
ulint* extra,
rec_comp_status_t status)
{
return rec_get_converted_size_comp_prefix_low(
return rec_get_converted_size_comp_prefix_low<false,redundant_temp>(
index, fields, n_fields, extra, status, true);
}
template ulint rec_get_converted_size_temp<false>(
const dict_index_t*, const dfield_t*, ulint, ulint*,
rec_comp_status_t);
template ulint rec_get_converted_size_temp<true>(
const dict_index_t*, const dfield_t*, ulint, ulint*,
rec_comp_status_t);
/** Determine the offset to each field in temporary file.
@param[in] rec temporary file record
@param[in] index index of that the record belongs to
......@@ -1811,6 +1825,7 @@ rec_init_offsets_temp(
@param[in] n_fields number of data fields
@param[in] status REC_STATUS_ORDINARY or REC_STATUS_INSTANT
*/
template<bool redundant_temp>
void
rec_convert_dtuple_to_temp(
rec_t* rec,
......@@ -1819,10 +1834,18 @@ rec_convert_dtuple_to_temp(
ulint n_fields,
rec_comp_status_t status)
{
rec_convert_dtuple_to_rec_comp(rec, index, fields, n_fields,
status, true);
rec_convert_dtuple_to_rec_comp<false,redundant_temp>(
rec, index, fields, n_fields, status, true);
}
template void rec_convert_dtuple_to_temp<false>(
rec_t*, const dict_index_t*, const dfield_t*,
ulint, rec_comp_status_t);
template void rec_convert_dtuple_to_temp<true>(
rec_t*, const dict_index_t*, const dfield_t*,
ulint, rec_comp_status_t);
/** Copy the first n fields of a (copy of a) physical record to a data tuple.
The fields are copied into the memory heap.
@param[out] tuple data tuple
......
/*****************************************************************************
Copyright (c) 2011, 2018, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2017, 2020, MariaDB Corporation.
Copyright (c) 2017, 2021, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
......@@ -354,7 +354,7 @@ row_log_online_op(
row_merge_buf_encode(), because here we do not encode
extra_size+1 (and reserve 0 as the end-of-chunk marker). */
size = rec_get_converted_size_temp(
size = rec_get_converted_size_temp<false>(
index, tuple->fields, tuple->n_fields, &extra_size);
ut_ad(size >= extra_size);
ut_ad(size <= sizeof log->tail.buf);
......@@ -402,7 +402,7 @@ row_log_online_op(
*b++ = (byte) extra_size;
}
rec_convert_dtuple_to_temp(
rec_convert_dtuple_to_temp<false>(
b + extra_size, index, tuple->fields, tuple->n_fields);
b += size;
......@@ -742,7 +742,7 @@ row_log_table_delete(
old_pk, old_pk->n_fields - 2)->len);
ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
old_pk, old_pk->n_fields - 1)->len);
old_pk_size = rec_get_converted_size_temp(
old_pk_size = rec_get_converted_size_temp<false>(
new_index, old_pk->fields, old_pk->n_fields,
&old_pk_extra_size);
ut_ad(old_pk_extra_size < 0x100);
......@@ -755,7 +755,7 @@ row_log_table_delete(
*b++ = ROW_T_DELETE;
*b++ = static_cast<byte>(old_pk_extra_size);
rec_convert_dtuple_to_temp(
rec_convert_dtuple_to_temp<false>(
b + old_pk_extra_size, new_index,
old_pk->fields, old_pk->n_fields);
......@@ -855,7 +855,7 @@ row_log_table_low_redundant(
rec_comp_status_t status = is_instant
? REC_STATUS_INSTANT : REC_STATUS_ORDINARY;
size = rec_get_converted_size_temp(
size = rec_get_converted_size_temp<true>(
index, tuple->fields, tuple->n_fields, &extra_size, status);
if (is_instant) {
size++;
......@@ -875,7 +875,7 @@ row_log_table_low_redundant(
ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
old_pk, old_pk->n_fields - 1)->len);
old_pk_size = rec_get_converted_size_temp(
old_pk_size = rec_get_converted_size_temp<false>(
new_index, old_pk->fields, old_pk->n_fields,
&old_pk_extra_size);
ut_ad(old_pk_extra_size < 0x100);
......@@ -892,7 +892,7 @@ row_log_table_low_redundant(
if (old_pk_size) {
*b++ = static_cast<byte>(old_pk_extra_size);
rec_convert_dtuple_to_temp(
rec_convert_dtuple_to_temp<false>(
b + old_pk_extra_size, new_index,
old_pk->fields, old_pk->n_fields);
b += old_pk_size;
......@@ -915,7 +915,7 @@ row_log_table_low_redundant(
*b = status;
}
rec_convert_dtuple_to_temp(
rec_convert_dtuple_to_temp<true>(
b + extra_size, index, tuple->fields, tuple->n_fields,
status);
b += size;
......@@ -1037,7 +1037,7 @@ row_log_table_low(
ut_ad(DATA_ROLL_PTR_LEN == dtuple_get_nth_field(
old_pk, old_pk->n_fields - 1)->len);
old_pk_size = rec_get_converted_size_temp(
old_pk_size = rec_get_converted_size_temp<false>(
new_index, old_pk->fields, old_pk->n_fields,
&old_pk_extra_size);
ut_ad(old_pk_extra_size < 0x100);
......@@ -1053,7 +1053,7 @@ row_log_table_low(
if (old_pk_size) {
*b++ = static_cast<byte>(old_pk_extra_size);
rec_convert_dtuple_to_temp(
rec_convert_dtuple_to_temp<false>(
b + old_pk_extra_size, new_index,
old_pk->fields, old_pk->n_fields);
b += old_pk_size;
......@@ -4042,3 +4042,9 @@ row_log_apply(
DBUG_RETURN(error);
}
unsigned row_log_get_n_core_fields(const dict_index_t *index)
{
ut_ad(index->online_log);
return index->online_log->n_core_fields;
}
......@@ -302,7 +302,7 @@ row_merge_buf_encode(
ulint size;
ulint extra_size;
size = rec_get_converted_size_temp(
size = rec_get_converted_size_temp<false>(
index, entry->fields, n_fields, &extra_size);
ut_ad(size >= extra_size);
......@@ -315,7 +315,7 @@ row_merge_buf_encode(
*(*b)++ = (byte) (extra_size + 1);
}
rec_convert_dtuple_to_temp(*b + extra_size, index,
rec_convert_dtuple_to_temp<false>(*b + extra_size, index,
entry->fields, n_fields);
*b += size;
......@@ -790,7 +790,7 @@ row_merge_buf_add(
ulint size;
ulint extra;
size = rec_get_converted_size_temp(
size = rec_get_converted_size_temp<false>(
index, entry->fields, n_fields, &extra);
ut_ad(data_size + extra_size == size);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment