Commit ca821692 authored by Marko Mäkelä's avatar Marko Mäkelä

Merge 10.5 into 10.6

parents e9f33b77 5eb53955
CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY)ENGINE=INNODB;
# SETTING auto_increment_increment IN CONNECTION DEFAULT
SET AUTO_INCREMENT_INCREMENT = 1;
# MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
SET GLOBAL innodb_flush_sync=OFF;
# For the server to hang, we must have pages for temporary tables
# (and the bug depended on MDEV-12227 not being fixed).
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
SET debug_dbug= '+d,ib_log_flush_ahead';
INSERT INTO t1 VALUES(NULL);
SELECT * FROM t1;
id
......
......@@ -8,6 +8,14 @@ CREATE TABLE t1 (id INT AUTO_INCREMENT PRIMARY KEY)ENGINE=INNODB;
--echo # SETTING auto_increment_increment IN CONNECTION DEFAULT
SET AUTO_INCREMENT_INCREMENT = 1;
--echo # MDEV-24348 InnoDB shutdown hang with innodb_flush_sync=0
SET GLOBAL innodb_flush_sync=OFF;
--echo # For the server to hang, we must have pages for temporary tables
--echo # (and the bug depended on MDEV-12227 not being fixed).
CREATE TEMPORARY TABLE t (id SERIAL) ENGINE=InnoDB;
SET debug_dbug= '+d,ib_log_flush_ahead';
INSERT INTO t1 VALUES(NULL);
SELECT * FROM t1;
SHOW CREATE TABLE t1;
......
......@@ -179,6 +179,26 @@ sync_slave_with_master;
--source include/show_binlog_events.inc
connection master;
--echo #
--echo # MDEV-24351: S3, same-backend replication: Dropping a table on master
--echo # causes error on slave
--echo #
show variables like 's3_replicate_alter_as_create_select';
connection slave;
create table t3 (a int, b int) engine=aria;
insert into t3 values (1,1),(2,2),(3,3);
alter table t3 engine=s3;
connection master;
let $binlog_start= query_get_value("SHOW MASTER STATUS", Position, 1);
drop table t3;
--echo # Must show "DROP TABLE IF EXISTS t3", not just "DROP TABLE t3"
--source include/show_binlog_events.inc
sync_slave_with_master;
connection master;
--echo #
--echo # clean up
--echo #
......
......@@ -272,6 +272,26 @@ slave-bin.000001 # Gtid # # GTID #-#-#
slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1`,`t2` /* generated by server */
connection master;
#
# MDEV-24351: S3, same-backend replication: Dropping a table on master
# causes error on slave
#
show variables like 's3_replicate_alter_as_create_select';
Variable_name Value
s3_replicate_alter_as_create_select ON
connection slave;
create table t3 (a int, b int) engine=aria;
insert into t3 values (1,1),(2,2),(3,3);
alter table t3 engine=s3;
connection master;
drop table t3;
# Must show "DROP TABLE IF EXISTS t3", not just "DROP TABLE t3"
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t3` /* generated by server */
connection slave;
connection master;
#
# clean up
#
connection slave;
......
......@@ -272,6 +272,26 @@ slave-bin.000001 # Gtid # # GTID #-#-#
slave-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t1`,`t2` /* generated by server */
connection master;
#
# MDEV-24351: S3, same-backend replication: Dropping a table on master
# causes error on slave
#
show variables like 's3_replicate_alter_as_create_select';
Variable_name Value
s3_replicate_alter_as_create_select ON
connection slave;
create table t3 (a int, b int) engine=aria;
insert into t3 values (1,1),(2,2),(3,3);
alter table t3 engine=s3;
connection master;
drop table t3;
# Must show "DROP TABLE IF EXISTS t3", not just "DROP TABLE t3"
include/show_binlog_events.inc
Log_name Pos Event_type Server_id End_log_pos Info
master-bin.000001 # Gtid # # GTID #-#-#
master-bin.000001 # Query # # use `database`; DROP TABLE IF EXISTS `t3` /* generated by server */
connection slave;
connection master;
#
# clean up
#
connection slave;
......
......@@ -5020,6 +5020,8 @@ static my_bool delete_table_force(THD *thd, plugin_ref plugin, void *arg)
param->error= error;
if (error == 0)
{
if (hton && hton->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)
thd->replication_flags |= OPTION_IF_EXISTS;
param->error= 0;
return TRUE; // Table was deleted
}
......
......@@ -5613,6 +5613,61 @@ void Item_func_like::print(String *str, enum_query_type query_type)
}
static bool fix_escape_item_now(THD *thd, Item *escape_item, String *tmp_str,
bool escape_used_in_parsing, CHARSET_INFO *cmp_cs,
int *escape)
{
String *escape_str= escape_item->val_str(tmp_str);
if (escape_str)
{
const char *escape_str_ptr= escape_str->ptr();
if (escape_used_in_parsing &&
((((thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES) &&
escape_str->numchars() != 1) ||
escape_str->numchars() > 1)))
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"ESCAPE");
return TRUE;
}
if (cmp_cs->use_mb())
{
CHARSET_INFO *cs= escape_str->charset();
my_wc_t wc;
int rc= cs->mb_wc(&wc,
(const uchar*) escape_str_ptr,
(const uchar*) escape_str_ptr +
escape_str->length());
*escape= (int) (rc > 0 ? wc : '\\');
}
else
{
/*
In the case of 8bit character set, we pass native
code instead of Unicode code as "escape" argument.
Convert to "cs" if charset of escape differs.
*/
uint32 unused;
if (escape_str->needs_conversion(escape_str->length(),
escape_str->charset(),cmp_cs,&unused))
{
char ch;
uint errors;
uint32 cnvlen= copy_and_convert(&ch, 1, cmp_cs, escape_str_ptr,
escape_str->length(),
escape_str->charset(), &errors);
*escape= cnvlen ? ch : '\\';
}
else
*escape= escape_str_ptr ? *escape_str_ptr : '\\';
}
}
else
*escape= '\\';
return FALSE;
}
longlong Item_func_like::val_int()
{
DBUG_ASSERT(fixed == 1);
......@@ -5631,6 +5686,17 @@ longlong Item_func_like::val_int()
null_value=0;
if (canDoTurboBM)
return turboBM_matches(res->ptr(), res->length()) ? !negated : negated;
if (unlikely(!escape_item_evaluated))
{
if (fix_escape_item_now(current_thd, escape_item, &cmp_value1,
escape_used_in_parsing,
cmp_collation.collation, &escape))
{
null_value= 1;
return 0;
}
escape_item_evaluated= 1;
}
return cmp_collation.collation->wildcmp(
res->ptr(),res->ptr()+res->length(),
res2->ptr(),res2->ptr()+res2->length(),
......@@ -5711,58 +5777,13 @@ bool fix_escape_item(THD *thd, Item *escape_item, String *tmp_str,
if (escape_item->const_item())
{
/* If we are on execution stage */
String *escape_str= escape_item->val_str(tmp_str);
if (escape_str)
{
const char *escape_str_ptr= escape_str->ptr();
if (escape_used_in_parsing && (
(((thd->variables.sql_mode & MODE_NO_BACKSLASH_ESCAPES) &&
escape_str->numchars() != 1) ||
escape_str->numchars() > 1)))
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"ESCAPE");
return TRUE;
}
if (cmp_cs->use_mb())
{
CHARSET_INFO *cs= escape_str->charset();
my_wc_t wc;
int rc= cs->mb_wc(&wc,
(const uchar*) escape_str_ptr,
(const uchar*) escape_str_ptr +
escape_str->length());
*escape= (int) (rc > 0 ? wc : '\\');
}
else
{
/*
In the case of 8bit character set, we pass native
code instead of Unicode code as "escape" argument.
Convert to "cs" if charset of escape differs.
*/
uint32 unused;
if (escape_str->needs_conversion(escape_str->length(),
escape_str->charset(),cmp_cs,&unused))
{
char ch;
uint errors;
uint32 cnvlen= copy_and_convert(&ch, 1, cmp_cs, escape_str_ptr,
escape_str->length(),
escape_str->charset(), &errors);
*escape= cnvlen ? ch : '\\';
}
else
*escape= escape_str_ptr ? *escape_str_ptr : '\\';
}
}
else
*escape= '\\';
return fix_escape_item_now(thd, escape_item, tmp_str, escape_used_in_parsing,
cmp_cs, escape);
}
return FALSE;
}
bool Item_func_like::fix_fields(THD *thd, Item **ref)
{
DBUG_ASSERT(fixed == 0);
......@@ -5772,8 +5793,10 @@ bool Item_func_like::fix_fields(THD *thd, Item **ref)
cmp_collation.collation, &escape))
return TRUE;
escape_item_evaluated= 0;
if (escape_item->const_item())
{
escape_item_evaluated= 1;
/*
We could also do boyer-more for non-const items, but as we would have to
recompute the tables for each row it's not worth it.
......
......@@ -2672,14 +2672,24 @@ class Item_func_isnotnull :public Item_func_null_predicate
class Item_func_like :public Item_bool_func2
{
// Turbo Boyer-Moore data
bool canDoTurboBM; // pattern is '%abcd%' case
const char* pattern;
int pattern_len;
Item *escape_item;
DTCollation cmp_collation;
String cmp_value1, cmp_value2;
// Turbo Boyer-Moore data
// TurboBM buffers, *this is owner
int* bmGs; // good suffix shift table, size is pattern_len + 1
int* bmBc; // bad character shift table, size is alphabet_size
int *bmGs; // good suffix shift table, size is pattern_len + 1
int *bmBc; // bad character shift table, size is alphabet_size
int pattern_len;
public:
int escape;
bool negated;
private:
bool canDoTurboBM; // pattern is '%abcd%' case
bool escape_item_evaluated;
bool escape_used_in_parsing;
bool use_sampling;
void turboBM_compute_suffixes(int* suff);
void turboBM_compute_good_suffix_shifts(int* suff);
......@@ -2687,13 +2697,6 @@ class Item_func_like :public Item_bool_func2
bool turboBM_matches(const char* text, int text_len) const;
enum { alphabet_size = 256 };
Item *escape_item;
bool escape_used_in_parsing;
bool use_sampling;
DTCollation cmp_collation;
String cmp_value1, cmp_value2;
bool with_sargable_pattern() const;
protected:
SEL_TREE *get_func_mm_tree(RANGE_OPT_PARAM *param,
......@@ -2706,13 +2709,13 @@ class Item_func_like :public Item_bool_func2
KEY_PART *key_part,
Item_func::Functype type, Item *value);
public:
int escape;
bool negated;
Item_func_like(THD *thd, Item *a, Item *b, Item *escape_arg, bool escape_used):
Item_bool_func2(thd, a, b), canDoTurboBM(FALSE), pattern(0), pattern_len(0),
bmGs(0), bmBc(0), escape_item(escape_arg),
escape_used_in_parsing(escape_used), use_sampling(0), negated(0) {}
Item_bool_func2(thd, a, b), pattern(0), escape_item(escape_arg),
bmGs(0), bmBc(0), pattern_len(0), negated(0), canDoTurboBM(FALSE),
escape_item_evaluated(0), escape_used_in_parsing(escape_used),
use_sampling(0)
{}
bool get_negated() const { return negated; } // Used by ColumnStore
......
......@@ -2424,6 +2424,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
// note that for TABLE_TYPE_VIEW and TABLE_TYPE_UNKNOWN hton == NULL
}
thd->replication_flags= 0;
was_view= table_type == TABLE_TYPE_VIEW;
if ((table_type == TABLE_TYPE_UNKNOWN) || (was_view && !drop_view) ||
(table_type != TABLE_TYPE_SEQUENCE && drop_sequence))
......@@ -2473,7 +2474,6 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
if (hton && hton->flags & HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE)
log_if_exists= 1;
thd->replication_flags= 0;
bool enoent_warning= !dont_log_query && !(hton && hton->discover_table);
error= ha_delete_table(thd, hton, path, &db, &table_name, enoent_warning);
......@@ -2489,9 +2489,6 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
goto err;
}
}
/* This may be set by the storage engine in handler::delete_table() */
if (thd->replication_flags & OPTION_IF_EXISTS)
log_if_exists= 1;
/*
Delete the .frm file if we managed to delete the table from the
......@@ -2569,6 +2566,16 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST *tables, bool if_exists,
error= ferror;
}
/*
This may be set
- by the storage engine in handler::delete_table()
- when deleting a table without .frm file: delete_table_force() will
check if the storage engine that had the table had
HTON_TABLE_MAY_NOT_EXIST_ON_SLAVE flag
*/
if (thd->replication_flags & OPTION_IF_EXISTS)
log_if_exists= 1;
if (likely(!error) || non_existing_table_error(error))
{
if (Table_triggers_list::drop_all_triggers(thd, &db, &table_name,
......
......@@ -206,7 +206,7 @@ the common LRU list. That is, each manipulation of the common LRU
list will result in the same manipulation of the unzip_LRU list.
The chain of modified blocks (buf_pool.flush_list) contains the blocks
holding file pages that have been modified in the memory
holding persistent file pages that have been modified in the memory
but not written to disk yet. The block with the oldest modification
which has not yet been written to disk is at the end of the chain.
The access to this list is protected by buf_pool.flush_list_mutex.
......@@ -1339,6 +1339,12 @@ inline const buf_block_t *buf_pool_t::chunk_t::not_freed() const
break;
}
if (fsp_is_system_temporary(block->page.id().space()))
{
ut_ad(block->page.oldest_modification() <= 1);
break;
}
if (!block->page.ready_for_replace())
return block;
......@@ -1491,8 +1497,10 @@ void buf_pool_t::close()
/* The buffer pool must be clean during normal shutdown.
Only on aborted startup (with recovery) or with innodb_fast_shutdown=2
we may discard changes. */
ut_ad(!bpage->oldest_modification() || srv_is_being_started ||
srv_fast_shutdown == 2);
ut_d(const lsn_t oldest= bpage->oldest_modification();)
ut_ad(!oldest || srv_is_being_started ||
srv_fast_shutdown == 2 ||
(oldest == 1 && fsp_is_system_temporary(bpage->id().space())));
if (bpage->state() != BUF_BLOCK_FILE_PAGE)
buf_page_free_descriptor(bpage);
......@@ -4195,6 +4203,7 @@ void buf_pool_t::validate()
for (buf_page_t* b = UT_LIST_GET_FIRST(flush_list); b;
b = UT_LIST_GET_NEXT(list, b)) {
ut_ad(b->oldest_modification());
ut_ad(!fsp_is_system_temporary(b->id().space()));
n_flushing++;
switch (b->state()) {
......
......@@ -577,6 +577,7 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
const bool multi_batch= block1 + static_cast<uint32_t>(size) != block2 &&
old_first_free > size;
flushing_buffered_writes= 1 + multi_batch;
pages_submitted+= old_first_free;
/* Now safe to release the mutex. */
mysql_mutex_unlock(&mutex);
#ifdef UNIV_DEBUG
......@@ -611,7 +612,6 @@ bool buf_dblwr_t::flush_buffered_writes(const ulint size)
os_aio(request, write_buf,
os_offset_t{block1.page_no()} << srv_page_size_shift,
old_first_free << srv_page_size_shift);
srv_stats.data_written.add(old_first_free);
return true;
}
......@@ -628,17 +628,18 @@ void buf_dblwr_t::flush_buffered_writes_completed(const IORequest &request)
ut_ad(batch_running);
ut_ad(flushing_buffered_writes);
ut_ad(flushing_buffered_writes <= 2);
const bool completed= !--flushing_buffered_writes;
mysql_mutex_unlock(&mutex);
if (!completed)
writes_completed++;
if (UNIV_UNLIKELY(--flushing_buffered_writes))
{
mysql_mutex_unlock(&mutex);
return;
}
slot *const flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0];
ut_ad(flush_slot->reserved == flush_slot->first_free);
/* increment the doublewrite flushed pages counter */
srv_stats.dblwr_pages_written.add(flush_slot->first_free);
srv_stats.dblwr_writes.inc();
pages_written+= flush_slot->first_free;
mysql_mutex_unlock(&mutex);
/* Now flush the doublewrite buffer data to disk */
fil_system.sys_space->flush();
......
......@@ -148,6 +148,7 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
mysql_mutex_assert_not_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&log_sys.flush_order_mutex);
ut_ad(lsn);
ut_ad(!fsp_is_system_temporary(block->page.id().space()));
mysql_mutex_lock(&buf_pool.flush_list_mutex);
block->page.set_oldest_modification(lsn);
......@@ -163,24 +164,27 @@ void buf_flush_insert_into_flush_list(buf_block_t* block, lsn_t lsn)
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
}
/** Remove a block from buf_pool.flush_list */
static void buf_flush_remove_low(buf_page_t *bpage)
{
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
mysql_mutex_assert_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
ut_ad(!bpage->oldest_modification());
buf_pool.flush_hp.adjust(bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
buf_pool.stat.flush_list_bytes -= bpage->physical_size();
#ifdef UNIV_DEBUG
buf_flush_validate_skip();
#endif /* UNIV_DEBUG */
}
/** Remove a block from the flush list of modified blocks.
@param[in,out] bpage block to be removed from the flush list */
static void buf_flush_remove(buf_page_t *bpage)
{
mysql_mutex_assert_owner(&buf_pool.mutex);
mysql_mutex_assert_owner(&buf_pool.flush_list_mutex);
/* Important that we adjust the hazard pointer before removing
the bpage from flush list. */
buf_pool.flush_hp.adjust(bpage);
UT_LIST_REMOVE(buf_pool.flush_list, bpage);
bpage->clear_oldest_modification();
buf_pool.stat.flush_list_bytes -= bpage->physical_size();
#ifdef UNIV_DEBUG
buf_flush_validate_skip();
#endif /* UNIV_DEBUG */
bpage->clear_oldest_modification();
buf_flush_remove_low(bpage);
}
/** Remove all dirty pages belonging to a given tablespace when we are
......@@ -280,6 +284,7 @@ buf_flush_relocate_on_flush_list(
buf_page_t* prev;
mysql_mutex_assert_owner(&buf_pool.mutex);
ut_ad(!fsp_is_system_temporary(bpage->id().space()));
if (!bpage->oldest_modification()) {
return;
......@@ -356,11 +361,19 @@ void buf_page_write_complete(const IORequest &request)
DBUG_PRINT("ib_buf", ("write page %u:%u",
bpage->id().space(), bpage->id().page_no()));
ut_ad(request.is_LRU() ? buf_pool.n_flush_LRU : buf_pool.n_flush_list);
const bool temp= fsp_is_system_temporary(bpage->id().space());
mysql_mutex_lock(&buf_pool.mutex);
bpage->set_io_fix(BUF_IO_NONE);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_remove(bpage);
ut_ad(!temp || bpage->oldest_modification() == 1);
bpage->clear_oldest_modification();
if (!temp)
buf_flush_remove_low(bpage);
else
ut_ad(request.is_LRU());
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (dblwr)
......@@ -784,8 +797,13 @@ static void buf_release_freed_page(buf_page_t *bpage)
mysql_mutex_lock(&buf_pool.mutex);
bpage->set_io_fix(BUF_IO_NONE);
bpage->status= buf_page_t::NORMAL;
const bool temp= fsp_is_system_temporary(bpage->id().space());
ut_ad(!temp || uncompressed);
ut_ad(!temp || bpage->oldest_modification() == 1);
mysql_mutex_lock(&buf_pool.flush_list_mutex);
buf_flush_remove(bpage);
bpage->clear_oldest_modification();
if (!temp)
buf_flush_remove_low(bpage);
mysql_mutex_unlock(&buf_pool.flush_list_mutex);
if (uncompressed)
......@@ -879,14 +897,20 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
{
space->reacquire();
ut_ad(status == buf_page_t::NORMAL || status == buf_page_t::INIT_ON_FLUSH);
size_t size, orig_size;
size_t size;
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
size_t orig_size;
#endif
IORequest::Type type= lru ? IORequest::WRITE_LRU : IORequest::WRITE_ASYNC;
if (UNIV_UNLIKELY(!rw_lock)) /* ROW_FORMAT=COMPRESSED */
{
ut_ad(!space->full_crc32());
ut_ad(!space->is_compressed()); /* not page_compressed */
orig_size= size= bpage->zip_size();
size= bpage->zip_size();
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
orig_size= size;
#endif
buf_flush_update_zip_checksum(frame, size);
frame= buf_page_encrypt(space, bpage, frame, &size);
ut_ad(size == bpage->zip_size());
......@@ -894,7 +918,10 @@ static bool buf_flush_page(buf_page_t *bpage, bool lru, fil_space_t *space)
else
{
byte *page= block->frame;
orig_size= size= block->physical_size();
size= block->physical_size();
#if defined HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || defined _WIN32
orig_size= size;
#endif
if (space->full_crc32())
{
......@@ -1539,7 +1566,7 @@ ulint buf_flush_lists(ulint max_n, lsn_t lsn)
const bool running= n_flush != 0;
/* FIXME: we are performing a dirty read of buf_pool.flush_list.count
while not holding buf_pool.flush_list_mutex */
if (running || !UT_LIST_GET_LEN(buf_pool.flush_list))
if (running || (lsn && !UT_LIST_GET_LEN(buf_pool.flush_list)))
{
if (!running)
mysql_cond_broadcast(cond);
......@@ -1846,33 +1873,6 @@ ATTRIBUTE_COLD static void buf_flush_sync_for_checkpoint(lsn_t lsn)
}
}
/*********************************************************************//**
Calculates if flushing is required based on number of dirty pages in
the buffer pool.
@param dirty_pct 100*flush_list.count / (LRU.count + free.count)
@return percent of io_capacity to flush to manage dirty page ratio */
static ulint af_get_pct_for_dirty(double dirty_pct)
{
ut_ad(srv_max_dirty_pages_pct_lwm <= srv_max_buf_pool_modified_pct);
if (srv_max_dirty_pages_pct_lwm == 0) {
/* The user has not set the option to preflush dirty
pages as we approach the high water mark. */
if (dirty_pct >= srv_max_buf_pool_modified_pct) {
/* We have crossed the high water mark of dirty
pages In this case we start flushing at 100% of
innodb_io_capacity. */
return(100);
}
} else {
/* We should start flushing pages gradually. */
return(static_cast<ulint>((dirty_pct * 100)
/ (srv_max_buf_pool_modified_pct + 1)));
}
return(0);
}
/*********************************************************************//**
Calculates if flushing is required based on redo generation rate.
@return percent of io_capacity to flush to manage redo space */
......@@ -1907,9 +1907,11 @@ Based on various factors it decides if there is a need to do flushing.
@return number of pages recommended to be flushed
@param last_pages_in number of pages flushed in previous batch
@param oldest_lsn buf_pool.get_oldest_modification(0)
@param dirty_blocks UT_LIST_GET_LEN(buf_pool.flush_list)
@param dirty_pct 100*flush_list.count / (LRU.count + free.count) */
static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
lsn_t oldest_lsn,
ulint dirty_blocks,
double dirty_pct)
{
static lsn_t prev_lsn = 0;
......@@ -1921,16 +1923,24 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
ulint n_pages = 0;
const lsn_t cur_lsn = log_sys.get_lsn();
ulint pct_for_dirty = af_get_pct_for_dirty(dirty_pct);
ut_ad(oldest_lsn <= cur_lsn);
ulint pct_for_lsn = af_get_pct_for_lsn(cur_lsn - oldest_lsn);
time_t curr_time = time(nullptr);
const double max_pct = srv_max_buf_pool_modified_pct;
if (!prev_lsn || !pct_for_lsn) {
prev_time = curr_time;
prev_lsn = cur_lsn;
return ulint(double(pct_for_dirty) / 100.0
* double(srv_io_capacity));
if (max_pct > 0.0) {
dirty_pct /= max_pct;
}
n_pages = ulint(dirty_pct * double(srv_io_capacity));
if (n_pages < dirty_blocks) {
n_pages= std::min<ulint>(srv_io_capacity, dirty_blocks);
}
return n_pages;
}
sum_pages += last_pages_in;
......@@ -1979,8 +1989,8 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
sum_pages = 0;
}
mysql_mutex_lock(&buf_pool.flush_list_mutex);
const ulint pct_for_dirty = static_cast<ulint>
(max_pct > 0.0 ? dirty_pct / max_pct : dirty_pct);
ulint pct_total = std::max(pct_for_dirty, pct_for_lsn);
/* Estimate pages to be flushed for the lsn progress */
......@@ -1988,6 +1998,8 @@ static ulint page_cleaner_flush_pages_recommendation(ulint last_pages_in,
+ lsn_avg_rate * buf_flush_lsn_scan_factor;
ulint pages_for_lsn = 0;
mysql_mutex_lock(&buf_pool.flush_list_mutex);
for (buf_page_t* b = UT_LIST_GET_LAST(buf_pool.flush_list);
b != NULL;
b = UT_LIST_GET_PREV(list, b)) {
......@@ -2120,7 +2132,9 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
if (dirty_pct < srv_max_dirty_pages_pct_lwm && !lsn_limit)
goto unemployed;
const lsn_t oldest_lsn= buf_pool.get_oldest_modification(0);
const lsn_t oldest_lsn= buf_pool.get_oldest_modified()
->oldest_modification();
ut_ad(oldest_lsn);
if (UNIV_UNLIKELY(lsn_limit != 0) && oldest_lsn >= lsn_limit)
buf_flush_sync_lsn= 0;
......@@ -2161,6 +2175,7 @@ static os_thread_ret_t DECLARE_THREAD(buf_flush_page_cleaner)(void*)
}
else if (ulint n= page_cleaner_flush_pages_recommendation(last_pages,
oldest_lsn,
dirty_blocks,
dirty_pct))
{
page_cleaner.flush_pass++;
......@@ -2300,7 +2315,8 @@ void buf_flush_sync()
struct Check {
void operator()(const buf_page_t* elem) const
{
ut_a(elem->oldest_modification());
ut_ad(elem->oldest_modification());
ut_ad(!fsp_is_system_temporary(elem->id().space()));
}
};
......
......@@ -873,6 +873,15 @@ class buf_page_t
/** Clear oldest_modification when removing from buf_pool.flush_list */
inline void clear_oldest_modification();
/** Notify that a page in a temporary tablespace has been modified. */
void set_temp_modified()
{
ut_ad(fsp_is_system_temporary(id().space()));
ut_ad(state() == BUF_BLOCK_FILE_PAGE);
ut_ad(!oldest_modification());
oldest_modification_= 1;
}
/** Prepare to release a file page to buf_pool.free. */
void free_file_page()
{
......@@ -1472,18 +1481,22 @@ class buf_pool_t
inline buf_block_t *block_from_ahi(const byte *ptr) const;
#endif /* BTR_CUR_HASH_ADAPT */
/** @return the block that was made dirty the longest time ago */
const buf_page_t *get_oldest_modified() const
{
mysql_mutex_assert_owner(&flush_list_mutex);
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
ut_ad(!bpage || !fsp_is_system_temporary(bpage->id().space()));
ut_ad(!bpage || bpage->oldest_modification());
return bpage;
}
/**
@return the smallest oldest_modification lsn for any page
@retval empty_lsn if all modified persistent pages have been flushed */
lsn_t get_oldest_modification(lsn_t empty_lsn)
lsn_t get_oldest_modification(lsn_t empty_lsn) const
{
mysql_mutex_assert_owner(&flush_list_mutex);
const buf_page_t *bpage= UT_LIST_GET_LAST(flush_list);
#if 1 /* MDEV-12227 FIXME: remove this loop */
for (; bpage && fsp_is_system_temporary(bpage->id().space());
bpage= UT_LIST_GET_PREV(list, bpage))
ut_ad(bpage->oldest_modification());
#endif
const buf_page_t *bpage= get_oldest_modified();
return bpage ? bpage->oldest_modification() : empty_lsn;
}
......
......@@ -66,6 +66,12 @@ class buf_dblwr_t
bool batch_running;
/** number of expected flush_buffered_writes_completed() calls */
unsigned flushing_buffered_writes;
/** pages submitted to flush_buffered_writes() */
ulint pages_submitted;
/** number of flush_buffered_writes_completed() calls */
ulint writes_completed;
/** number of pages written by flush_buffered_writes_completed() */
ulint pages_written;
slot slots[2];
slot *active_slot= &slots[0];
......@@ -84,6 +90,20 @@ class buf_dblwr_t
/** Free the doublewrite buffer. */
void close();
/** Acquire the mutex */
void lock() { mysql_mutex_lock(&mutex); }
/** @return the number of submitted page writes */
ulint submitted() const
{ mysql_mutex_assert_owner(&mutex); return pages_submitted; }
/** @return the number of completed batches */
ulint batches() const
{ mysql_mutex_assert_owner(&mutex); return writes_completed; }
/** @return the number of final pages written */
ulint written() const
{ mysql_mutex_assert_owner(&mutex); return pages_written; }
/** Release the mutex */
void unlock() { mysql_mutex_unlock(&mutex); }
/** Initialize the doublewrite buffer memory structure on recovery.
If we are upgrading from a version before MySQL 4.1, then this
function performs the necessary update operations to support
......
......@@ -65,10 +65,12 @@ buf_flush_note_modification(
const lsn_t oldest_modification = block->page.oldest_modification();
if (!oldest_modification) {
if (oldest_modification) {
ut_ad(oldest_modification <= start_lsn);
} else if (!fsp_is_system_temporary(block->page.id().space())) {
buf_flush_insert_into_flush_list(block, start_lsn);
} else {
ut_ad(oldest_modification <= start_lsn);
block->page.set_temp_modified();
}
srv_stats.buf_pool_write_requests.inc();
......
......@@ -81,13 +81,6 @@ struct srv_stats_t
space in the log buffer and have to flush it */
ulint_ctr_1_t log_waits;
/** Count the number of times the doublewrite buffer was flushed */
ulint_ctr_1_t dblwr_writes;
/** Store the number of pages that have been flushed to the
doublewrite buffer */
ulint_ctr_1_t dblwr_pages_written;
#if defined(LINUX_NATIVE_AIO)
ulint_ctr_1_t buffered_aio_submitted;
#endif
......
......@@ -901,6 +901,7 @@ inline std::pair<lsn_t,bool> mtr_t::finish_write(ulint len)
m_log.for_each_block(write_log);
m_commit_lsn = log_sys.get_lsn();
bool flush = log_close(m_commit_lsn);
DBUG_EXECUTE_IF("ib_log_flush_ahead", flush=true;);
return std::make_pair(start_lsn, flush);
}
......
......@@ -1666,12 +1666,16 @@ srv_mon_process_existing_counter(
/* innodb_dblwr_writes */
case MONITOR_OVLD_SRV_DBLWR_WRITES:
value = srv_stats.dblwr_writes;
buf_dblwr.lock();
value = buf_dblwr.batches();
buf_dblwr.unlock();
break;
/* innodb_dblwr_pages_written */
case MONITOR_OVLD_SRV_DBLWR_PAGES_WRITTEN:
value = srv_stats.dblwr_pages_written;
buf_dblwr.lock();
value = buf_dblwr.written();
buf_dblwr.unlock();
break;
/* innodb_page_size */
......
......@@ -1093,7 +1093,17 @@ srv_export_innodb_status(void)
export_vars.innodb_data_writes = os_n_file_writes;
export_vars.innodb_data_written = srv_stats.data_written;
ulint dblwr = 0;
if (buf_dblwr.is_initialised()) {
buf_dblwr.lock();
dblwr = buf_dblwr.submitted();
export_vars.innodb_dblwr_pages_written = buf_dblwr.written();
export_vars.innodb_dblwr_writes = buf_dblwr.batches();
buf_dblwr.unlock();
}
export_vars.innodb_data_written = srv_stats.data_written + dblwr;
export_vars.innodb_buffer_pool_read_requests
= buf_pool.stat.n_page_gets;
......@@ -1169,11 +1179,6 @@ srv_export_innodb_status(void)
export_vars.innodb_log_writes = srv_stats.log_writes;
export_vars.innodb_dblwr_pages_written =
srv_stats.dblwr_pages_written;
export_vars.innodb_dblwr_writes = srv_stats.dblwr_writes;
export_vars.innodb_pages_created = buf_pool.stat.n_pages_created;
export_vars.innodb_pages_read = buf_pool.stat.n_pages_read;
......
......@@ -561,15 +561,8 @@ void thread_pool_generic::worker_main(worker_data *thread_var)
Helper function, to be used inside maintenance callback,
before m_last_activity is updated
*/
#ifndef __has_feature
# define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer)
const /* WITH_MSAN in clang++-11 does not work with constexpr */
#else
constexpr
#endif
auto invalid_timestamp= std::chrono::system_clock::time_point::max();
static const auto invalid_timestamp= std::chrono::system_clock::time_point::max();
constexpr auto max_idle_time= std::chrono::minutes(1);
/* Time since maintenance timer had nothing to do */
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment