Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
f0de610d
Commit
f0de610d
authored
Sep 10, 2024
by
Marko Mäkelä
Browse files
Options
Browse Files
Download
Plain Diff
Merge 10.11 into 11.2
parents
abd98336
f9f92b48
Changes
17
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
837 additions
and
655 deletions
+837
-655
mysql-test/suite/galera/r/MDEV-33133.result
mysql-test/suite/galera/r/MDEV-33133.result
+34
-0
mysql-test/suite/galera/t/MDEV-33133.test
mysql-test/suite/galera/t/MDEV-33133.test
+80
-0
scripts/sys_schema/README.md
scripts/sys_schema/README.md
+287
-295
sql/wsrep_high_priority_service.cc
sql/wsrep_high_priority_service.cc
+12
-0
sql/wsrep_mysqld.cc
sql/wsrep_mysqld.cc
+8
-2
storage/innobase/btr/btr0btr.cc
storage/innobase/btr/btr0btr.cc
+1
-1
storage/innobase/btr/btr0cur.cc
storage/innobase/btr/btr0cur.cc
+108
-86
storage/innobase/btr/btr0pcur.cc
storage/innobase/btr/btr0pcur.cc
+1
-2
storage/innobase/buf/buf0buf.cc
storage/innobase/buf/buf0buf.cc
+175
-155
storage/innobase/buf/buf0flu.cc
storage/innobase/buf/buf0flu.cc
+1
-1
storage/innobase/buf/buf0rea.cc
storage/innobase/buf/buf0rea.cc
+13
-7
storage/innobase/gis/gis0sea.cc
storage/innobase/gis/gis0sea.cc
+2
-2
storage/innobase/include/buf0buf.h
storage/innobase/include/buf0buf.h
+69
-29
storage/innobase/include/buf0rea.h
storage/innobase/include/buf0rea.h
+4
-7
storage/innobase/row/row0import.cc
storage/innobase/row/row0import.cc
+33
-28
storage/innobase/row/row0merge.cc
storage/innobase/row/row0merge.cc
+4
-36
storage/innobase/trx/trx0undo.cc
storage/innobase/trx/trx0undo.cc
+5
-4
No files found.
mysql-test/suite/galera/r/MDEV-33133.result
0 → 100644
View file @
f0de610d
connection node_2;
connection node_1;
connect node_1a,127.0.0.1,root,,test,$NODE_MYPORT_1;
connection node_1;
CREATE TABLE t1 (f1 INTEGER PRIMARY KEY) ENGINE=InnoDB;
SET GLOBAL DEBUG_DBUG = 'd,sync.wsrep_rollback_mdl_release';
connection node_2;
SET SESSION wsrep_trx_fragment_size = 1;
START TRANSACTION;
INSERT INTO t1 VALUES (1);
connection node_1a;
SELECT COUNT(*) FROM t1;
COUNT(*)
0
SET SESSION wsrep_retry_autocommit = 0;
SET DEBUG_SYNC = 'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort';
INSERT INTO t1 VALUES (2);
connection node_1;
SET DEBUG_SYNC = 'now WAIT_FOR may_toi';
SET DEBUG_SYNC = 'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached';
TRUNCATE TABLE t1;
connection node_1a;
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
SET DEBUG_SYNC = 'now SIGNAL signal.wsrep_rollback_mdl_release';
connection node_2;
INSERT INTO t1 VALUES (3);
ERROR 40001: Deadlock found when trying to get lock; try restarting transaction
connection node_1;
SET GLOBAL DEBUG_DBUG = '';
SET DEBUG_SYNC = 'RESET';
DROP TABLE t1;
disconnect node_1a;
disconnect node_2;
disconnect node_1;
mysql-test/suite/galera/t/MDEV-33133.test
0 → 100644
View file @
f0de610d
#
# MDEV-33133: MDL conflict handling code should skip transactions
# BF-aborted before.
#
# It's possible that MDL conflict handling code is called more
# than once for a transaction when:
# - it holds more than one conflicting MDL lock
# - reschedule_waiters() is executed,
# which results in repeated attempts to BF-abort already aborted
# transaction.
# In such situations, it might be that BF-aborting logic sees
# a partially rolled back transaction and erroneously decides
# on future actions for such a transaction.
#
# The specific situation tested and fixed is when a SR transaction
# applied in the node gets BF-aborted by a started TOI operation.
# It's then caught with the server transaction already rolled back,
# but with no MDL locks yet released. This caused wrong state
# detection for such a transaction during repeated MDL conflict
# handling code execution.
#
--
source
include
/
galera_cluster
.
inc
--
source
include
/
have_debug_sync
.
inc
--
source
include
/
have_debug
.
inc
--
connect
node_1a
,
127.0
.
0.1
,
root
,,
test
,
$NODE_MYPORT_1
--
connection
node_1
CREATE
TABLE
t1
(
f1
INTEGER
PRIMARY
KEY
)
ENGINE
=
InnoDB
;
SET
GLOBAL
DEBUG_DBUG
=
'd,sync.wsrep_rollback_mdl_release'
;
--
connection
node_2
SET
SESSION
wsrep_trx_fragment_size
=
1
;
START
TRANSACTION
;
INSERT
INTO
t1
VALUES
(
1
);
--
connection
node_1a
# Sync wait for SR transaction to replicate and apply fragment.
SELECT
COUNT
(
*
)
FROM
t1
;
SET
SESSION
wsrep_retry_autocommit
=
0
;
SET
DEBUG_SYNC
=
'ha_write_row_start SIGNAL may_toi WAIT_FOR bf_abort'
;
--
send
INSERT
INTO
t1
VALUES
(
2
);
--
connection
node_1
SET
DEBUG_SYNC
=
'now WAIT_FOR may_toi'
;
# BF-abort SR transaction and wait until it reaches the point
# prior to release MDL locks.
# Then abort local INSERT, which will go through rescedule_waiters()
# and see SR transaction holding MDL locks but already rolled back.
# In this case SR transaction should be skipped in MDL conflict
# handling code.
SET
DEBUG_SYNC
=
'after_wsrep_thd_abort WAIT_FOR sync.wsrep_rollback_mdl_release_reached'
;
--
send
TRUNCATE
TABLE
t1
;
--
connection
node_1a
# Local INSERT gets aborted.
--
error
ER_LOCK_DEADLOCK
--
reap
# Let the aborted SR transaction continue and finally release MDL locks,
# which in turn allows TRUNCATE to complete.
SET
DEBUG_SYNC
=
'now SIGNAL signal.wsrep_rollback_mdl_release'
;
--
connection
node_2
# SR transaction has been BF-aborted.
--
error
ER_LOCK_DEADLOCK
INSERT
INTO
t1
VALUES
(
3
);
--
connection
node_1
# TRUNCATE completes.
--
reap
# Cleanup
SET
GLOBAL
DEBUG_DBUG
=
''
;
SET
DEBUG_SYNC
=
'RESET'
;
DROP
TABLE
t1
;
--
disconnect
node_1a
--
source
include
/
galera_end
.
inc
scripts/sys_schema/README.md
View file @
f0de610d
This source diff could not be displayed because it is too large. You can
view the blob
instead.
sql/wsrep_high_priority_service.cc
View file @
f0de610d
...
...
@@ -392,6 +392,18 @@ int Wsrep_high_priority_service::rollback(const wsrep::ws_handle& ws_handle,
wsrep_thd_transaction_state_str
(
m_thd
),
m_thd
->
killed
);
#ifdef ENABLED_DEBUG_SYNC
DBUG_EXECUTE_IF
(
"sync.wsrep_rollback_mdl_release"
,
{
const
char
act
[]
=
"now "
"SIGNAL sync.wsrep_rollback_mdl_release_reached "
"WAIT_FOR signal.wsrep_rollback_mdl_release"
;
DBUG_ASSERT
(
!
debug_sync_set_action
(
m_thd
,
STRING_WITH_LEN
(
act
)));
};);
#endif
m_thd
->
release_transactional_locks
();
free_root
(
m_thd
->
mem_root
,
MYF
(
MY_KEEP_PREALLOC
));
...
...
sql/wsrep_mysqld.cc
View file @
f0de610d
...
...
@@ -3209,7 +3209,12 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
mysql_mutex_lock
(
&
granted_thd
->
LOCK_thd_kill
);
mysql_mutex_lock
(
&
granted_thd
->
LOCK_thd_data
);
if
(
wsrep_thd_is_toi
(
granted_thd
)
||
if
(
granted_thd
->
wsrep_aborter
!=
0
)
{
DBUG_ASSERT
(
granted_thd
->
wsrep_aborter
==
request_thd
->
thread_id
);
WSREP_DEBUG
(
"BF thread waiting for a victim to release locks"
);
}
else
if
(
wsrep_thd_is_toi
(
granted_thd
)
||
wsrep_thd_is_applying
(
granted_thd
))
{
if
(
wsrep_thd_is_aborting
(
granted_thd
))
...
...
@@ -3300,6 +3305,7 @@ void wsrep_handle_mdl_conflict(MDL_context *requestor_ctx,
}
mysql_mutex_unlock
(
&
granted_thd
->
LOCK_thd_data
);
mysql_mutex_unlock
(
&
granted_thd
->
LOCK_thd_kill
);
DEBUG_SYNC
(
request_thd
,
"after_wsrep_thd_abort"
);
}
else
{
...
...
storage/innobase/btr/btr0btr.cc
View file @
f0de610d
...
...
@@ -1126,7 +1126,7 @@ void btr_drop_temporary_table(const dict_table_t &table)
{
if
(
buf_block_t
*
block
=
buf_page_get_gen
({
SRV_TMP_SPACE_ID
,
index
->
page
},
0
,
RW_X_LATCH
,
nullptr
,
BUF_GET
,
&
mtr
,
nullptr
,
nullptr
))
&
mtr
,
nullptr
))
{
btr_free_but_not_root
(
block
,
MTR_LOG_NO_REDO
);
mtr
.
set_log_mode
(
MTR_LOG_NO_REDO
);
...
...
storage/innobase/btr/btr0cur.cc
View file @
f0de610d
...
...
@@ -926,24 +926,21 @@ static inline page_cur_mode_t btr_cur_nonleaf_mode(page_cur_mode_t mode)
MY_ATTRIBUTE
((
nonnull
,
warn_unused_result
))
/** Acquire a latch on the previous page without violating the latching order.
@param block index page
@param page_id page identifier with valid space identifier
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param rw_latch the latch on block (RW_S_LATCH or RW_X_LATCH)
@param
mtr mini-transaction
@param
page_id page identifier with valid space identifier
@param err error code
@param mtr mini-transaction
@retval 0 if an error occurred
@retval 1 if the page could be latched in the wrong order
@retval -1 if the latch on block was temporarily released */
static
int
btr_latch_prev
(
buf_block_t
*
block
,
page_id_t
page_id
,
ulint
zip_size
,
rw_lock_type_t
rw_latch
,
mtr_t
*
mtr
,
dberr_t
*
err
)
static
int
btr_latch_prev
(
rw_lock_type_t
rw_latch
,
page_id_t
page_id
,
dberr_t
*
err
,
mtr_t
*
mtr
)
{
ut_ad
(
rw_latch
==
RW_S_LATCH
||
rw_latch
==
RW_X_LATCH
);
ut_ad
(
page_id
.
space
()
==
block
->
page
.
id
().
space
());
const
auto
prev_savepoint
=
mtr
->
get_savepoint
();
ut_ad
(
block
==
mtr
->
at_savepoint
(
prev_savepoint
-
1
));
buf_block_t
*
block
=
mtr
->
at_savepoint
(
mtr
->
get_savepoint
()
-
1
);
ut_ad
(
page_id
.
space
()
==
block
->
page
.
id
().
space
());
const
page_t
*
const
page
=
block
->
page
.
frame
;
page_id
.
set_page_no
(
btr_page_get_prev
(
page
));
...
...
@@ -959,68 +956,78 @@ static int btr_latch_prev(buf_block_t *block, page_id_t page_id,
buffer-fixes on both blocks will prevent eviction. */
retry:
/* Pass no_wait pointer to ensure that we don't wait on the current page
latch while holding the next page latch to avoid latch ordering violation. */
bool
no_wait
=
false
;
int
ret
=
1
;
buf_block_t
*
prev
=
buf_page_get_gen
(
page_id
,
zip_size
,
RW_NO_LATCH
,
nullptr
,
BUF_GET
,
mtr
,
err
,
&
no_wait
);
buf_block_t
*
prev
=
buf_pool
.
page_fix
(
page_id
,
err
,
buf_pool_t
::
FIX_NOWAIT
);
if
(
UNIV_UNLIKELY
(
!
prev
))
return
0
;
if
(
prev
==
reinterpret_cast
<
buf_block_t
*>
(
-
1
))
{
/* The block existed in buf_pool.page_hash, but not in a state that is
safe to access without waiting for some pending operation, such as
buf_page_t::read_complete() or buf_pool_t::unzip().
Retry while temporarily releasing the successor block->page.lock
(but retaining a buffer-fix so that the block cannot be evicted. */
if
(
rw_latch
==
RW_S_LATCH
)
block
->
page
.
lock
.
s_unlock
();
else
block
->
page
.
lock
.
x_unlock
();
prev
=
buf_pool
.
page_fix
(
page_id
,
err
,
buf_pool_t
::
FIX_WAIT_READ
);
if
(
!
prev
)
{
/* Check if we had to return because we couldn't wait on latch. */
if
(
no_wait
)
goto
ordered_latch
;
ut_ad
(
*
err
!=
DB_SUCCESS
);
if
(
rw_latch
==
RW_S_LATCH
)
block
->
page
.
lock
.
s_lock
();
else
block
->
page
.
lock
.
x_lock
();
return
0
;
}
else
if
(
rw_latch
==
RW_S_LATCH
)
goto
wait_for_s
;
else
goto
wait_for_x
;
}
static_assert
(
MTR_MEMO_PAGE_S_FIX
==
mtr_memo_type_t
(
BTR_SEARCH_LEAF
),
""
);
static_assert
(
MTR_MEMO_PAGE_X_FIX
==
mtr_memo_type_t
(
BTR_MODIFY_LEAF
),
""
);
if
(
rw_latch
==
RW_S_LATCH
?
prev
->
page
.
lock
.
s_lock_try
()
:
prev
->
page
.
lock
.
x_lock_try
())
{
mtr
->
lock_register
(
prev_savepoint
,
mtr_memo_type_t
(
rw_latch
));
if
(
UNIV_UNLIKELY
(
prev
->
page
.
id
()
!=
page_id
))
{
fail:
/* the page was just read and found to be corrupted */
mtr
->
rollback_to_savepoint
(
prev_savepoint
);
return
0
;
}
}
?
prev
->
page
.
lock
.
s_lock_try
()
:
prev
->
page
.
lock
.
x_lock_try
())
mtr
->
memo_push
(
prev
,
mtr_memo_type_t
(
rw_latch
));
else
{
ut_ad
(
mtr
->
at_savepoint
(
mtr
->
get_savepoint
()
-
1
)
->
page
.
id
()
==
page_id
);
mtr
->
release_last_page
();
ordered_latch:
if
(
rw_latch
==
RW_S_LATCH
)
{
block
->
page
.
lock
.
s_unlock
();
else
block
->
page
.
lock
.
x_unlock
();
prev
=
buf_page_get_gen
(
page_id
,
zip_size
,
rw_latch
,
prev
,
BUF_GET
,
mtr
,
err
);
if
(
rw_latch
==
RW_S_LATCH
)
wait_for_s:
prev
->
page
.
lock
.
s_lock
();
block
->
page
.
lock
.
s_lock
();
}
else
{
block
->
page
.
lock
.
x_unlock
();
wait_for_x:
prev
->
page
.
lock
.
x_lock
();
block
->
page
.
lock
.
x_lock
();
}
ut_ad
(
block
==
mtr
->
at_savepoint
(
mtr
->
get_savepoint
()
-
1
));
mtr
->
memo_push
(
prev
,
mtr_memo_type_t
(
rw_latch
));
const
page_id_t
prev_page_id
=
page_id
;
page_id
.
set_page_no
(
btr_page_get_prev
(
page
));
ret
=
-
1
;
if
(
UNIV_UNLIKELY
(
page_id
!=
prev_page_id
))
{
mtr
->
release_last_page
();
if
(
page_id
.
page_no
()
==
FIL_NULL
)
return
-
1
;
return
ret
;
goto
retry
;
}
if
(
UNIV_UNLIKELY
(
!
prev
))
goto
fail
;
ret
=
-
1
;
}
const
page_t
*
const
p
=
prev
->
page
.
frame
;
...
...
@@ -1047,11 +1054,11 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
btr_intention_t
lock_intention
;
bool
detected_same_key_root
=
false
;
mem_heap_t
*
heap
=
NULL
;
mem_heap_t
*
heap
=
nullptr
;
rec_offs
offsets_
[
REC_OFFS_NORMAL_SIZE
];
rec_offs
*
offsets
=
offsets_
;
rec_offs
*
offsets
=
offsets_
;
rec_offs
offsets2_
[
REC_OFFS_NORMAL_SIZE
];
rec_offs
*
offsets2
=
offsets2_
;
rec_offs
*
offsets2
=
offsets2_
;
rec_offs_init
(
offsets_
);
rec_offs_init
(
offsets2_
);
...
...
@@ -1208,7 +1215,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
ut_a
(
page_zip_validate
(
page_zip
,
block
->
page
.
frame
,
index
()));
#endif
/* UNIV_ZIP_DEBUG */
const
uint32_t
page_level
=
btr_page_get_level
(
block
->
page
.
frame
);
uint32_t
page_level
=
btr_page_get_level
(
block
->
page
.
frame
);
if
(
height
==
ULINT_UNDEFINED
)
{
...
...
@@ -1216,6 +1223,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
#ifdef BTR_CUR_ADAPT
info
->
root_guess
=
block
;
#endif
reached_root:
height
=
page_level
;
tree_height
=
height
+
1
;
...
...
@@ -1225,35 +1233,53 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
We may have to reacquire the page latch in a different mode. */
switch
(
rw_latch
)
{
case
RW_S_LATCH
:
if
(
(
latch_mode
&
~
12
)
!=
RW_S_LATCH
)
if
(
!
(
latch_mode
&
BTR_SEARCH_LEAF
)
)
{
rw_latch
=
RW_X_LATCH
;
ut_ad
(
rw_lock_type_t
(
latch_mode
&
~
12
)
==
RW_X_LATCH
);
goto
relatch_x
;
}
else
mtr
->
lock_register
(
block_savepoint
,
MTR_MEMO_PAGE_X_FIX
);
if
(
!
block
->
page
.
lock
.
s_x_upgrade_try
())
{
block
->
page
.
lock
.
s_unlock
();
block
->
page
.
lock
.
x_lock
();
/* Dropping the index tree (and freeing the root page)
should be impossible while we hold index()->lock. */
ut_ad
(
!
block
->
page
.
is_freed
());
page_level
=
btr_page_get_level
(
block
->
page
.
frame
);
if
(
UNIV_UNLIKELY
(
page_level
!=
0
))
{
/* btr_root_raise_and_insert() was executed meanwhile */
ut_ad
(
mtr
->
memo_contains_flagged
(
&
index
()
->
lock
,
MTR_MEMO_S_LOCK
));
block
->
page
.
lock
.
x_u_downgrade
();
block
->
page
.
lock
.
u_s_downgrade
();
rw_latch
=
RW_S_LATCH
;
mtr
->
lock_register
(
block_savepoint
,
MTR_MEMO_PAGE_S_FIX
);
goto
reached_root
;
}
}
}
if
(
rw_latch
!=
RW_S_LATCH
)
break
;
if
(
!
latch_by_caller
)
/* Release the tree s-latch */
mtr
->
rollback_to_savepoint
(
savepoint
,
savepoint
+
1
);
goto
reached_latched_leaf
;
}
/* fall through */
case
RW_SX_LATCH
:
ut_ad
(
rw_latch
==
RW_S_LATCH
||
latch_mode
==
BTR_MODIFY_ROOT_AND_LEAF
);
relatch_x:
mtr
->
rollback_to_savepoint
(
block_savepoint
);
height
=
ULINT_UNDEFINED
;
ut_ad
(
latch_mode
==
BTR_MODIFY_ROOT_AND_LEAF
);
static_assert
(
int
{
BTR_MODIFY_ROOT_AND_LEAF
}
==
int
{
RW_SX_LATCH
},
""
);
rw_latch
=
RW_X_LATCH
;
goto
search_loop
;
mtr
->
lock_register
(
block_savepoint
,
MTR_MEMO_PAGE_X_FIX
);
block
->
page
.
lock
.
u_x_upgrade
();
break
;
case
RW_X_LATCH
:
if
(
latch_mode
==
BTR_MODIFY_TREE
)
goto
reached_index_root_and_leaf
;
goto
reached_root_and_leaf
;
break
;
case
RW_NO_LATCH
:
ut_ad
(
0
);
}
goto
reached_leaf
;
goto
reached_
root_and_
leaf
;
}
}
else
if
(
UNIV_UNLIKELY
(
height
!=
page_level
))
...
...
@@ -1277,7 +1303,6 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
if
(
!
height
)
{
reached_leaf:
/* We reached the leaf level. */
ut_ad
(
block
==
mtr
->
at_savepoint
(
block_savepoint
));
...
...
@@ -1307,7 +1332,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
/* latch also siblings from left to right */
if
(
page_has_prev
(
block
->
page
.
frame
)
&&
!
btr_latch_prev
(
block
,
page_id
,
zip_size
,
rw_latch
,
mtr
,
&
er
r
))
!
btr_latch_prev
(
rw_latch
,
page_id
,
&
err
,
mt
r
))
goto
func_exit
;
if
(
page_has_next
(
block
->
page
.
frame
)
&&
!
btr_block_get
(
*
index
(),
btr_page_get_next
(
block
->
page
.
frame
),
...
...
@@ -1333,7 +1358,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
ut_ad
(
rw_latch
==
RW_X_LATCH
);
/* x-latch also siblings from left to right */
if
(
page_has_prev
(
block
->
page
.
frame
)
&&
!
btr_latch_prev
(
block
,
page_id
,
zip_size
,
rw_latch
,
mtr
,
&
er
r
))
!
btr_latch_prev
(
rw_latch
,
page_id
,
&
err
,
mt
r
))
goto
func_exit
;
if
(
page_has_next
(
block
->
page
.
frame
)
&&
!
btr_block_get
(
*
index
(),
btr_page_get_next
(
block
->
page
.
frame
),
...
...
@@ -1480,7 +1505,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
ut_ad
(
rw_latch
==
RW_S_LATCH
);
if
(
!
not_first_access
)
buf_read_ahead_linear
(
page_id
,
zip_size
);
buf_read_ahead_linear
(
page_id
);
if
(
page_has_prev
(
block
->
page
.
frame
)
&&
page_rec_is_first
(
page_cur
.
rec
,
block
->
page
.
frame
))
...
...
@@ -1489,7 +1514,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
/* Latch the previous page if the node pointer is the leftmost
of the current page. */
int
ret
=
btr_latch_prev
(
block
,
page_id
,
zip_size
,
rw_latch
,
mtr
,
&
er
r
);
int
ret
=
btr_latch_prev
(
rw_latch
,
page_id
,
&
err
,
mt
r
);
if
(
!
ret
)
goto
func_exit
;
ut_ad
(
block_savepoint
+
2
==
mtr
->
get_savepoint
());
...
...
@@ -1515,7 +1540,7 @@ dberr_t btr_cur_t::search_leaf(const dtuple_t *tuple, page_cur_mode_t mode,
case
BTR_SEARCH_LEAF
:
rw_latch
=
rw_lock_type_t
(
latch_mode
);
if
(
!
not_first_access
)
buf_read_ahead_linear
(
page_id
,
zip_size
);
buf_read_ahead_linear
(
page_id
);
break
;
case
BTR_MODIFY_TREE
:
ut_ad
(
rw_latch
==
RW_X_LATCH
);
...
...
@@ -1666,8 +1691,7 @@ dberr_t btr_cur_t::pessimistic_search_leaf(const dtuple_t *tuple,
#endif
/* UNIV_ZIP_DEBUG */
if
(
page_has_prev
(
block
->
page
.
frame
)
&&
!
btr_latch_prev
(
block
,
page_id
,
block
->
zip_size
(),
RW_X_LATCH
,
mtr
,
&
err
))
!
btr_latch_prev
(
RW_X_LATCH
,
page_id
,
&
err
,
mtr
))
goto
func_exit
;
if
(
page_has_next
(
block
->
page
.
frame
)
&&
!
btr_block_get
(
*
index
(),
btr_page_get_next
(
block
->
page
.
frame
),
...
...
@@ -1880,7 +1904,6 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
page_cur
.
index
=
index
;
uint32_t
page
=
index
->
page
;
const
auto
zip_size
=
index
->
table
->
space
->
zip_size
();
for
(
ulint
height
=
ULINT_UNDEFINED
;;)
{
...
...
@@ -1931,8 +1954,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
{
/* x-latch also siblings from left to right */
if
(
page_has_prev
(
block
->
page
.
frame
)
&&
!
btr_latch_prev
(
block
,
block
->
page
.
id
(),
zip_size
,
RW_X_LATCH
,
mtr
,
&
err
))
!
btr_latch_prev
(
RW_X_LATCH
,
block
->
page
.
id
(),
&
err
,
mtr
))
break
;
if
(
page_has_next
(
block
->
page
.
frame
)
&&
!
btr_block_get
(
*
index
,
btr_page_get_next
(
block
->
page
.
frame
),
...
...
@@ -1986,8 +2008,7 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
if
(
latch_mode
!=
BTR_MODIFY_TREE
)
{
if
(
!
height
&&
first
&&
first_access
)
buf_read_ahead_linear
(
page_id_t
(
block
->
page
.
id
().
space
(),
page
),
block
->
page
.
zip_size
());
buf_read_ahead_linear
(
page_id_t
(
block
->
page
.
id
().
space
(),
page
));
}
else
if
(
btr_cur_need_opposite_intention
(
block
->
page
,
index
->
is_clust
(),
lock_intention
,
...
...
@@ -2012,7 +2033,8 @@ dberr_t btr_cur_t::open_leaf(bool first, dict_index_t *index,
{
if
(
!
btr_cur_will_modify_tree
(
index
,
block
->
page
.
frame
,
lock_intention
,
page_cur
.
rec
,
node_ptr_max_size
,
zip_size
,
mtr
))
node_ptr_max_size
,
index
->
table
->
space
->
zip_size
(),
mtr
))
{
ut_ad
(
n_blocks
);
/* release buffer-fixes on pages that will not be modified
...
...
@@ -6408,7 +6430,7 @@ btr_copy_blob_prefix(
return
copied_len
;
}
if
(
!
buf_page_make_young_if_needed
(
&
block
->
page
))
{
buf_read_ahead_linear
(
id
,
0
);
buf_read_ahead_linear
(
id
);
}
page
=
buf_block_get_frame
(
block
);
...
...
@@ -6487,7 +6509,7 @@ btr_copy_zblob_prefix(
bpage is protected by the B-tree page latch that
is being held on the clustered index record, or,
in row_merge_copy_blobs(), by an exclusive table lock. */
bpage
=
buf_page_get_zip
(
id
,
zip_size
);
bpage
=
buf_page_get_zip
(
id
);
if
(
UNIV_UNLIKELY
(
!
bpage
))
{
ib
::
error
()
<<
"Cannot load compressed BLOB "
<<
id
;
...
...
storage/innobase/btr/btr0pcur.cc
View file @
f0de610d
...
...
@@ -533,8 +533,7 @@ btr_pcur_move_to_next_page(
const
auto
s
=
mtr
->
get_savepoint
();
mtr
->
rollback_to_savepoint
(
s
-
2
,
s
-
1
);
if
(
first_access
)
{
buf_read_ahead_linear
(
next_block
->
page
.
id
(),
next_block
->
zip_size
());
buf_read_ahead_linear
(
next_block
->
page
.
id
());
}
return
DB_SUCCESS
;
}
...
...
storage/innobase/buf/buf0buf.cc
View file @
f0de610d
...
...
@@ -2347,13 +2347,10 @@ be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches.
@param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size in bytes
@return pointer to the block, s-latched */
TRANSACTIONAL_TARGET
buf_page_t
*
buf_page_get_zip
(
const
page_id_t
page_id
,
ulint
zip_size
)
buf_page_t
*
buf_page_get_zip
(
const
page_id_t
page_id
)
{
ut_ad
(
zip_size
);
ut_ad
(
ut_is_2pow
(
zip_size
));
ha_handler_stats
*
const
stats
=
mariadb_stats
;
buf_inc_get
(
stats
);
...
...
@@ -2454,7 +2451,7 @@ buf_page_t* buf_page_get_zip(const page_id_t page_id, ulint zip_size)
return
bpage
;
must_read_page:
switch
(
dberr_t
err
=
buf_read_page
(
page_id
,
zip_size
,
chain
))
{
switch
(
dberr_t
err
=
buf_read_page
(
page_id
,
chain
))
{
case
DB_SUCCESS
:
case
DB_SUCCESS_LOCKED_REC
:
mariadb_increment_pages_read
(
stats
);
...
...
@@ -2489,8 +2486,8 @@ buf_block_init_low(
/********************************************************************//**
Decompress a block.
@return
TRUE
if successful */
i
bool
@return
true
if successful */
bool
buf_zip_decompress
(
/*===============*/
buf_block_t
*
block
,
/*!< in/out: block */
...
...
@@ -2534,7 +2531,7 @@ buf_zip_decompress(
if
(
space
)
{
space
->
release
();
}
return
(
TRUE
)
;
return
true
;
}
ib
::
error
()
<<
"Unable to decompress "
...
...
@@ -2568,10 +2565,101 @@ buf_zip_decompress(
space
->
release
();
}
return
(
FALSE
);
return
false
;
}
ATTRIBUTE_COLD
buf_block_t
*
buf_pool_t
::
unzip
(
buf_page_t
*
b
,
buf_pool_t
::
hash_chain
&
chain
)
{
buf_block_t
*
block
=
buf_LRU_get_free_block
(
have_no_mutex
);
buf_block_init_low
(
block
);
page_hash_latch
&
hash_lock
=
page_hash
.
lock_get
(
chain
);
wait_for_unfix:
mysql_mutex_lock
(
&
mutex
);
hash_lock
.
lock
();
/* b->lock implies !b->can_relocate() */
ut_ad
(
b
->
lock
.
have_x
());
ut_ad
(
b
==
page_hash
.
get
(
b
->
id
(),
chain
));
/* Wait for b->unfix() in any other threads. */
uint32_t
state
=
b
->
state
();
ut_ad
(
buf_page_t
::
buf_fix_count
(
state
));
ut_ad
(
!
buf_page_t
::
is_freed
(
state
));
switch
(
state
)
{
case
buf_page_t
:
:
UNFIXED
+
1
:
case
buf_page_t
:
:
REINIT
+
1
:
break
;
default:
ut_ad
(
state
<
buf_page_t
::
READ_FIX
);
if
(
state
<
buf_page_t
::
UNFIXED
+
1
)
{
ut_ad
(
state
>
buf_page_t
::
FREED
);
b
->
lock
.
x_unlock
();
hash_lock
.
unlock
();
buf_LRU_block_free_non_file_page
(
block
);
mysql_mutex_unlock
(
&
mutex
);
b
->
unfix
();
return
nullptr
;
}
mysql_mutex_unlock
(
&
mutex
);
hash_lock
.
unlock
();
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
microseconds
(
100
));
goto
wait_for_unfix
;
}
/* Ensure that another buf_page_get_low() or buf_page_t::page_fix()
will wait for block->page.lock.x_unlock(). buf_relocate() will
copy the state from b to block and replace b with block in page_hash. */
b
->
set_state
(
buf_page_t
::
READ_FIX
);
mysql_mutex_lock
(
&
flush_list_mutex
);
buf_relocate
(
b
,
&
block
->
page
);
/* X-latch the block for the duration of the decompression. */
block
->
page
.
lock
.
x_lock
();
buf_flush_relocate_on_flush_list
(
b
,
&
block
->
page
);
mysql_mutex_unlock
(
&
flush_list_mutex
);
/* Insert at the front of unzip_LRU list */
buf_unzip_LRU_add_block
(
block
,
false
);
mysql_mutex_unlock
(
&
mutex
);
hash_lock
.
unlock
();
#if defined SUX_LOCK_GENERIC || defined UNIV_DEBUG
b
->
lock
.
x_unlock
();
b
->
lock
.
free
();
#endif
ut_free
(
b
);
n_pend_unzip
++
;
const
bool
ok
{
buf_zip_decompress
(
block
,
false
)};
n_pend_unzip
--
;
if
(
UNIV_UNLIKELY
(
!
ok
))
{
mysql_mutex_lock
(
&
mutex
);
block
->
page
.
read_unfix
(
state
);
block
->
page
.
lock
.
x_unlock
();
if
(
!
buf_LRU_free_page
(
&
block
->
page
,
true
))
ut_ad
(
0
);
mysql_mutex_unlock
(
&
mutex
);
return
nullptr
;
}
else
block
->
page
.
read_unfix
(
state
);
return
block
;
}
buf_block_t
*
buf_pool_t
::
page_fix
(
const
page_id_t
id
)
buf_block_t
*
buf_pool_t
::
page_fix
(
const
page_id_t
id
,
dberr_t
*
err
,
buf_pool_t
::
page_fix_conflicts
c
)
{
ha_handler_stats
*
const
stats
=
mariadb_stats
;
buf_inc_get
(
stats
);
...
...
@@ -2583,35 +2671,83 @@ buf_block_t* buf_pool_t::page_fix(const page_id_t id)
buf_page_t
*
b
=
page_hash
.
get
(
id
,
chain
);
if
(
b
)
{
uint32_t
state
=
b
->
fix
();
hash_lock
.
unlock_shared
();
uint32_t
state
=
b
->
fix
()
+
1
;
ut_ad
(
!
b
->
in_zip_hash
);
ut_ad
(
b
->
frame
);
ut_ad
(
state
>=
buf_page_t
::
FREED
);
hash_lock
.
unlock_shared
();
if
(
UNIV_UNLIKELY
(
state
<
buf_page_t
::
UNFIXED
))
{
ut_ad
(
state
>
buf_page_t
::
FREED
);
if
(
c
==
FIX_ALSO_FREED
&&
b
->
id
()
==
id
)
{
ut_ad
(
state
==
buf_page_t
::
FREED
+
1
);
return
reinterpret_cast
<
buf_block_t
*>
(
b
);
}
/* The page was marked as freed or corrupted. */
b
->
unfix
();
corrupted:
if
(
err
)
*
err
=
DB_CORRUPTION
;
return
nullptr
;
}
if
(
state
>=
buf_page_t
::
READ_FIX
&&
state
<
buf_page_t
::
WRITE_FIX
)
{
if
(
c
==
FIX_NOWAIT
)
{
would_block:
b
->
unfix
();
return
reinterpret_cast
<
buf_block_t
*>
(
-
1
);
}
if
(
UNIV_UNLIKELY
(
!
b
->
frame
))
{
wait_for_unzip:
b
->
unfix
();
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
microseconds
(
100
));
continue
;
}
b
->
lock
.
s_lock
();
state
=
b
->
state
();
ut_ad
(
state
<
buf_page_t
::
READ_FIX
||
state
>=
buf_page_t
::
WRITE_FIX
);
b
->
lock
.
s_unlock
();
}
if
(
UNIV_UNLIKELY
(
state
<
buf_page_t
::
UNFIXED
))
if
(
UNIV_UNLIKELY
(
!
b
->
frame
))
{
/* The page was marked as freed or corrupted. */
b
->
unfix
();
b
=
nullptr
;
if
(
b
->
lock
.
x_lock_try
());
else
if
(
c
==
FIX_NOWAIT
)
goto
would_block
;
else
goto
wait_for_unzip
;
buf_block_t
*
block
=
unzip
(
b
,
chain
);
if
(
!
block
)
goto
corrupted
;
b
=
&
block
->
page
;
state
=
b
->
state
();
b
->
lock
.
x_unlock
();
}
return
reinterpret_cast
<
buf_block_t
*>
(
b
);
}
hash_lock
.
unlock_shared
();
switch
(
buf_read_page
(
id
,
0
,
chain
))
{
if
(
c
==
FIX_NOWAIT
)
return
reinterpret_cast
<
buf_block_t
*>
(
-
1
);
switch
(
dberr_t
local_err
=
buf_read_page
(
id
,
chain
))
{
default:
if
(
err
)
*
err
=
local_err
;
return
nullptr
;
case
DB_SUCCESS
:
case
DB_SUCCESS_LOCKED_REC
:
mariadb_increment_pages_read
(
stats
);
buf_read_ahead_random
(
id
,
0
);
buf_read_ahead_random
(
id
);
}
}
}
...
...
@@ -2619,27 +2755,24 @@ buf_block_t* buf_pool_t::page_fix(const page_id_t id)
/** Low level function used to get access to a database page.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch
RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] rw_latch
latch mode
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
or BUF_PEEK_IF_IN_POOL
@param[in] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
@param[in,out] no_wait If not NULL on input, then we must not
wait for current page latch. On output, the value is set to true if we had to
return because we could not wait on page latch.
@return pointer to the block or NULL */
@return pointer to the block
@retval nullptr if the block is corrupted or unavailable */
TRANSACTIONAL_TARGET
buf_block_t
*
buf_page_get_gen
(
const
page_id_t
page_id
,
ulint
zip_size
,
ulint
rw_latch
,
rw_lock_type_t
rw_latch
,
buf_block_t
*
guess
,
ulint
mode
,
mtr_t
*
mtr
,
dberr_t
*
err
,
bool
*
no_wait
)
dberr_t
*
err
)
{
ulint
retries
=
0
;
...
...
@@ -2658,12 +2791,7 @@ buf_page_get_gen(
||
log_sys
.
get_lsn
()
==
recv_sys
.
lsn
+
SIZE_OF_FILE_CHECKPOINT
||
ibuf_upgrade_was_needed
:
!
recv_recovery_is_on
()
||
recv_sys
.
after_apply
);
ut_ad
(
!
mtr
||
mtr
->
is_active
());
ut_ad
(
mtr
||
mode
==
BUF_PEEK_IF_IN_POOL
);
ut_ad
((
rw_latch
==
RW_S_LATCH
)
||
(
rw_latch
==
RW_X_LATCH
)
||
(
rw_latch
==
RW_SX_LATCH
)
||
(
rw_latch
==
RW_NO_LATCH
));
ut_ad
(
mtr
->
is_active
());
if
(
err
)
{
*
err
=
DB_SUCCESS
;
...
...
@@ -2745,11 +2873,11 @@ buf_page_get_gen(
corrupted, or if an encrypted page with a valid
checksum cannot be decypted. */
switch
(
dberr_t
local_err
=
buf_read_page
(
page_id
,
zip_size
,
chain
))
{
switch
(
dberr_t
local_err
=
buf_read_page
(
page_id
,
chain
))
{
case
DB_SUCCESS
:
case
DB_SUCCESS_LOCKED_REC
:
mariadb_increment_pages_read
(
stats
);
buf_read_ahead_random
(
page_id
,
zip_size
);
buf_read_ahead_random
(
page_id
);
break
;
default:
if
(
mode
!=
BUF_GET_POSSIBLY_FREED
...
...
@@ -2793,18 +2921,7 @@ buf_page_get_gen(
in buf_page_t::read_complete() or
buf_pool_t::corrupted_evict(), or
after buf_zip_decompress() in this function. */
if
(
!
no_wait
)
{
block
->
page
.
lock
.
s_lock
();
}
else
if
(
!
block
->
page
.
lock
.
s_lock_try
())
{
ut_ad
(
rw_latch
==
RW_NO_LATCH
);
/* We should not wait trying to acquire S latch for
current page while holding latch for the next page.
It would violate the latching order resulting in
possible deadlock. Caller must handle the failure. */
block
->
page
.
unfix
();
*
no_wait
=
true
;
return
nullptr
;
}
state
=
block
->
page
.
state
();
ut_ad
(
state
<
buf_page_t
::
READ_FIX
||
state
>=
buf_page_t
::
WRITE_FIX
);
...
...
@@ -2834,18 +2951,6 @@ buf_page_get_gen(
}
ut_ad
(
id
==
page_id
);
}
else
if
(
mode
!=
BUF_PEEK_IF_IN_POOL
)
{
}
else
if
(
!
mtr
)
{
ut_ad
(
!
block
->
page
.
oldest_modification
());
mysql_mutex_lock
(
&
buf_pool
.
mutex
);
block
->
unfix
();
free_unfixed_block:
if
(
!
buf_LRU_free_page
(
&
block
->
page
,
true
))
{
ut_ad
(
0
);
}
mysql_mutex_unlock
(
&
buf_pool
.
mutex
);
return
nullptr
;
}
else
if
(
UNIV_UNLIKELY
(
!
block
->
page
.
frame
))
{
/* The BUF_PEEK_IF_IN_POOL mode is mainly used for dropping an
adaptive hash index. There cannot be an
...
...
@@ -2856,120 +2961,35 @@ buf_page_get_gen(
ut_ad
(
mode
==
BUF_GET_IF_IN_POOL
||
mode
==
BUF_PEEK_IF_IN_POOL
||
block
->
zip_size
()
==
zip_size
);
if
(
UNIV_UNLIKELY
(
state
<
buf_page_t
::
UNFIXED
))
{
goto
ignore_block
;
}
ut_ad
((
~
buf_page_t
::
LRU_MASK
)
&
state
);
ut_ad
(
state
>
buf_page_t
::
WRITE_FIX
||
state
<
buf_page_t
::
READ_FIX
);
if
(
UNIV_UNLIKELY
(
!
block
->
page
.
frame
))
{
if
(
!
block
->
page
.
lock
.
x_lock_try
())
{
wait_for_unzip:
/* The page is being read or written, or
another thread is executing buf_zip_decompress()
in buf_page_get_gen() on it. */
another thread is executing buf_pool.unzip() on it. */
block
->
page
.
unfix
();
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
microseconds
(
100
));
goto
loop
;
}
buf_block_t
*
new_block
=
buf_LRU_get_free_block
(
have_no_mutex
);
buf_block_init_low
(
new_block
);
wait_for_unfix:
mysql_mutex_lock
(
&
buf_pool
.
mutex
);
page_hash_latch
&
hash_lock
=
buf_pool
.
page_hash
.
lock_get
(
chain
);
/* It does not make sense to use
transactional_lock_guard here, because buf_relocate()
would likely make a memory transaction too large. */
hash_lock
.
lock
();
/* block->page.lock implies !block->page.can_relocate() */
ut_ad
(
&
block
->
page
==
buf_pool
.
page_hash
.
get
(
page_id
,
chain
));
/* Wait for any other threads to release their buffer-fix
on the compressed-only block descriptor. */
state
=
block
->
page
.
state
();
switch
(
state
)
{
case
buf_page_t
:
:
UNFIXED
+
1
:
case
buf_page_t
:
:
REINIT
+
1
:
break
;
default:
ut_ad
(
state
<
buf_page_t
::
READ_FIX
);
block
=
buf_pool
.
unzip
(
&
block
->
page
,
chain
);
if
(
state
<
buf_page_t
::
UNFIXED
+
1
)
{
ut_ad
(
state
>
buf_page_t
::
FREED
);
block
->
page
.
lock
.
x_unlock
();
hash_lock
.
unlock
();
buf_LRU_block_free_non_file_page
(
new_block
);
mysql_mutex_unlock
(
&
buf_pool
.
mutex
);
goto
ignore_block
;
}
mysql_mutex_unlock
(
&
buf_pool
.
mutex
);
hash_lock
.
unlock
();
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
microseconds
(
100
));
goto
wait_for_unfix
;
if
(
!
block
)
{
goto
ignore_unfixed
;
}
/* Ensure that another buf_page_get_gen() will wait for
new_block->page.lock.x_unlock(). */
block
->
page
.
set_state
(
buf_page_t
::
READ_FIX
);
/* Move the compressed page from block->page to new_block,
and uncompress it. */
mysql_mutex_lock
(
&
buf_pool
.
flush_list_mutex
);
buf_relocate
(
&
block
->
page
,
&
new_block
->
page
);
/* X-latch the block for the duration of the decompression. */
new_block
->
page
.
lock
.
x_lock
();
ut_d
(
block
->
page
.
lock
.
x_unlock
());
buf_flush_relocate_on_flush_list
(
&
block
->
page
,
&
new_block
->
page
);
mysql_mutex_unlock
(
&
buf_pool
.
flush_list_mutex
);
/* Insert at the front of unzip_LRU list */
buf_unzip_LRU_add_block
(
new_block
,
FALSE
);
mysql_mutex_unlock
(
&
buf_pool
.
mutex
);
hash_lock
.
unlock
();
#if defined SUX_LOCK_GENERIC || defined UNIV_DEBUG
block
->
page
.
lock
.
free
();
#endif
ut_free
(
reinterpret_cast
<
buf_page_t
*>
(
block
));
block
=
new_block
;
buf_pool
.
n_pend_unzip
++
;
/* Decompress the page while not holding
buf_pool.mutex. */
const
auto
ok
=
buf_zip_decompress
(
block
,
false
);
--
buf_pool
.
n_pend_unzip
;
if
(
!
ok
)
{
if
(
err
)
{
*
err
=
DB_PAGE_CORRUPTED
;
}
mysql_mutex_lock
(
&
buf_pool
.
mutex
);
}
state
=
block
->
page
.
read_unfix
(
state
);
block
->
page
.
lock
.
x_unlock
();
if
(
!
ok
)
{
goto
free_unfixed_block
;
}
}
if
(
UNIV_UNLIKELY
(
state
<
buf_page_t
::
UNFIXED
))
{
goto
ignore_block
;
}
ut_ad
((
~
buf_page_t
::
LRU_MASK
)
&
state
);
ut_ad
(
state
>
buf_page_t
::
WRITE_FIX
||
state
<
buf_page_t
::
READ_FIX
);
#ifdef UNIV_DEBUG
if
(
!
(
++
buf_dbg_counter
%
5771
))
buf_pool
.
validate
();
#endif
/* UNIV_DEBUG */
ut_ad
(
block
->
page
.
frame
);
/* The state = block->page.state() may be stale at this point,
and in fact, at any point of time if we consider its
...
...
storage/innobase/buf/buf0flu.cc
View file @
f0de610d
...
...
@@ -2682,12 +2682,12 @@ ATTRIBUTE_COLD void buf_flush_page_cleaner_init()
/** Flush the buffer pool on shutdown. */
ATTRIBUTE_COLD
void
buf_flush_buffer_pool
()
{
ut_ad
(
!
os_aio_pending_reads
());
ut_ad
(
!
buf_page_cleaner_is_active
);
ut_ad
(
!
buf_flush_sync_lsn
);
service_manager_extend_timeout
(
INNODB_EXTEND_TIMEOUT_INTERVAL
,
"Waiting to flush the buffer pool"
);
os_aio_wait_until_no_pending_reads
(
false
);
mysql_mutex_lock
(
&
buf_pool
.
flush_list_mutex
);
...
...
storage/innobase/buf/buf0rea.cc
View file @
f0de610d
...
...
@@ -303,10 +303,9 @@ pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches!
@param[in] page_id page id of a page which the current thread
wants to access
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@return number of page read requests issued */
TRANSACTIONAL_TARGET
ulint
buf_read_ahead_random
(
const
page_id_t
page_id
,
ulint
zip_size
)
ulint
buf_read_ahead_random
(
const
page_id_t
page_id
)
{
if
(
!
srv_random_read_ahead
||
page_id
.
space
()
>=
SRV_TMP_SPACE_ID
)
/* Disable the read-ahead for temporary tablespace */
...
...
@@ -353,6 +352,7 @@ ulint buf_read_ahead_random(const page_id_t page_id, ulint zip_size)
/* Read all the suitable blocks within the area */
buf_block_t
*
block
=
nullptr
;
unsigned
zip_size
{
space
->
zip_size
()};
if
(
UNIV_LIKELY
(
!
zip_size
))
{
allocate_block:
...
...
@@ -405,15 +405,14 @@ if it is not already there. Sets the io_fix and an exclusive lock
on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
@param page_id page id
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param chain buf_pool.page_hash cell for page_id
@retval DB_SUCCESS if the page was read and is not corrupted
,
@retval DB_SUCCESS if the page was read and is not corrupted
@retval DB_SUCCESS_LOCKED_REC if the page was not read
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
dberr_t
buf_read_page
(
const
page_id_t
page_id
,
ulint
zip_size
,
dberr_t
buf_read_page
(
const
page_id_t
page_id
,
buf_pool_t
::
hash_chain
&
chain
)
{
fil_space_t
*
space
=
fil_space_t
::
get
(
page_id
.
space
());
...
...
@@ -427,6 +426,8 @@ dberr_t buf_read_page(const page_id_t page_id, ulint zip_size,
/* Our caller should already have ensured that the page does not
exist in buf_pool.page_hash. */
buf_block_t
*
block
=
nullptr
;
unsigned
zip_size
=
space
->
zip_size
();
if
(
UNIV_LIKELY
(
!
zip_size
))
{
allocate_block:
...
...
@@ -511,10 +512,9 @@ NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
@param[in] page_id page id; see NOTE 3 above
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@return number of page read requests issued */
TRANSACTIONAL_TARGET
ulint
buf_read_ahead_linear
(
const
page_id_t
page_id
,
ulint
zip_size
)
ulint
buf_read_ahead_linear
(
const
page_id_t
page_id
)
{
/* check if readahead is disabled.
Disable the read ahead logic for temporary tablespace */
...
...
@@ -553,6 +553,11 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
return
0
;
}
if
(
trx_sys_hdr_page
(
page_id
))
/* If it is an ibuf bitmap page or trx sys hdr, we do no
read-ahead, as that could break the ibuf page access order */
goto
fail
;
/* How many out of order accessed pages can we ignore
when working out the access pattern for linear readahead */
ulint
count
=
std
::
min
<
ulint
>
(
buf_pool_t
::
READ_AHEAD_PAGES
-
...
...
@@ -647,6 +652,7 @@ ulint buf_read_ahead_linear(const page_id_t page_id, ulint zip_size)
/* If we got this far, read-ahead can be sensible: do it */
buf_block_t
*
block
=
nullptr
;
unsigned
zip_size
{
space
->
zip_size
()};
if
(
UNIV_LIKELY
(
!
zip_size
))
{
allocate_block:
...
...
storage/innobase/gis/gis0sea.cc
View file @
f0de610d
...
...
@@ -647,7 +647,7 @@ dberr_t rtr_search_to_nth_level(btr_cur_t *cur, que_thr_t *thr,
search_loop:
auto
buf_mode
=
BUF_GET
;
ulin
t
rw_latch
=
RW_NO_LATCH
;
rw_lock_type_
t
rw_latch
=
RW_NO_LATCH
;
if
(
height
)
{
...
...
@@ -658,7 +658,7 @@ dberr_t rtr_search_to_nth_level(btr_cur_t *cur, que_thr_t *thr,
rw_latch
=
upper_rw_latch
;
}
else
if
(
latch_mode
<=
BTR_MODIFY_LEAF
)
rw_latch
=
latch_mode
;
rw_latch
=
rw_lock_type_t
(
latch_mode
)
;
dberr_t
err
;
auto
block_savepoint
=
mtr
->
get_savepoint
();
...
...
storage/innobase/include/buf0buf.h
View file @
f0de610d
...
...
@@ -191,33 +191,29 @@ be implemented at a higher level. In other words, all possible
accesses to a given page through this function must be protected by
the same set of mutexes or latches.
@param page_id page identifier
@param zip_size ROW_FORMAT=COMPRESSED page size in bytes
@return pointer to the block, s-latched */
buf_page_t
*
buf_page_get_zip
(
const
page_id_t
page_id
,
ulint
zip_size
);
buf_page_t
*
buf_page_get_zip
(
const
page_id_t
page_id
);
/** Get access to a database page. Buffered redo log may be applied.
@param[in] page_id page id
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param[in] rw_latch
RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH
@param[in] rw_latch
latch mode
@param[in] guess guessed block or NULL
@param[in] mode BUF_GET, BUF_GET_IF_IN_POOL,
or BUF_PEEK_IF_IN_POOL
@param[in,out] mtr mini-transaction
@param[out] err DB_SUCCESS or error code
@param[in,out] no_wait If not NULL on input, then we must not
wait for current page latch. On output, the value is set to true if we had to
return because we could not wait on page latch.
@return pointer to the block or NULL */
@return pointer to the block
@retval nullptr if the block is corrupted or unavailable */
buf_block_t
*
buf_page_get_gen
(
const
page_id_t
page_id
,
ulint
zip_size
,
ulint
rw_latch
,
rw_lock_type_t
rw_latch
,
buf_block_t
*
guess
,
ulint
mode
,
mtr_t
*
mtr
,
dberr_t
*
err
=
nullptr
,
bool
*
no_wait
=
nullptr
);
dberr_t
*
err
=
nullptr
);
/** Initialize a page in the buffer pool. The page is usually not read
from a file even if it cannot be found in the buffer buf_pool. This is one
...
...
@@ -357,8 +353,8 @@ void buf_page_print(const byte* read_buf, ulint zip_size = 0)
ATTRIBUTE_COLD
__attribute__
((
nonnull
));
/********************************************************************//**
Decompress a block.
@return
TRUE
if successful */
i
bool
@return
true
if successful */
bool
buf_zip_decompress
(
/*===============*/
buf_block_t
*
block
,
/*!< in/out: block */
...
...
@@ -627,30 +623,42 @@ class buf_page_t
public:
const
page_id_t
&
id
()
const
{
return
id_
;
}
uint32_t
state
()
const
{
return
zip
.
fix
;
}
uint32_t
buf_fix_count
()
const
{
uint32_t
f
=
state
();
ut_ad
(
f
>=
FREED
);
return
f
<
UNFIXED
?
(
f
-
FREED
)
:
(
~
LRU_MASK
&
f
);
}
static
uint32_t
buf_fix_count
(
uint32_t
s
)
{
ut_ad
(
s
>=
FREED
);
return
s
<
UNFIXED
?
(
s
-
FREED
)
:
(
~
LRU_MASK
&
s
);
}
uint32_t
buf_fix_count
()
const
{
return
buf_fix_count
(
state
());
}
/** Check if a file block is io-fixed.
@param s state()
@return whether s corresponds to an io-fixed block */
static
bool
is_io_fixed
(
uint32_t
s
)
{
ut_ad
(
s
>=
FREED
);
return
s
>=
READ_FIX
;
}
/** Check if a file block is read-fixed.
@param s state()
@return whether s corresponds to a read-fixed block */
static
bool
is_read_fixed
(
uint32_t
s
)
{
return
is_io_fixed
(
s
)
&&
s
<
WRITE_FIX
;
}
/** Check if a file block is write-fixed.
@param s state()
@return whether s corresponds to a write-fixed block */
static
bool
is_write_fixed
(
uint32_t
s
)
{
ut_ad
(
s
>=
FREED
);
return
s
>=
WRITE_FIX
;
}
/** @return whether this block is read or write fixed;
read_complete() or write_complete() will always release
the io-fix before releasing U-lock or X-lock */
bool
is_io_fixed
()
const
{
const
auto
s
=
state
();
ut_ad
(
s
>=
FREED
);
return
s
>=
READ_FIX
;
}
bool
is_io_fixed
()
const
{
return
is_io_fixed
(
state
());
}
/** @return whether this block is write fixed;
write_complete() will always release the write-fix before releasing U-lock */
bool
is_write_fixed
()
const
{
return
state
()
>=
WRITE_FIX
;
}
/** @return whether this block is read fixed; this should never hold
when a thread is holding the block lock in any mode */
bool
is_read_fixed
()
const
{
return
is_io_fixed
()
&&
!
is_write_fixed
();
}
bool
is_write_fixed
()
const
{
return
is_write_fixed
(
state
());
}
/** @return whether this block is read fixed */
bool
is_read_fixed
()
const
{
return
is_read_fixed
(
state
());
}
/** @return if this belongs to buf_pool.unzip_LRU */
bool
belongs_to_unzip_LRU
()
const
{
return
UNIV_LIKELY_NULL
(
zip
.
data
)
&&
frame
;
}
bool
is_freed
()
const
{
const
auto
s
=
state
();
ut_ad
(
s
>=
FREED
);
return
s
<
UNFIXED
;
}
static
bool
is_freed
(
uint32_t
s
)
{
ut_ad
(
s
>=
FREED
);
return
s
<
UNFIXED
;
}
bool
is_freed
()
const
{
return
is_freed
(
state
())
;
}
bool
is_reinit
()
const
{
return
!
(
~
state
()
&
REINIT
);
}
void
set_reinit
(
uint32_t
prev_state
)
...
...
@@ -1358,11 +1366,43 @@ class buf_pool_t
}
public:
/** page_fix() mode of operation */
enum
page_fix_conflicts
{
/** Fetch if in the buffer pool, also blocks marked as free */
FIX_ALSO_FREED
=
-
1
,
/** Fetch, waiting for page read completion */
FIX_WAIT_READ
,
/** Fetch, but avoid any waits for */
FIX_NOWAIT
};
/** Look up and buffer-fix a page.
Note: If the page is read-fixed (being read into the buffer pool),
we would have to wait for the page latch before determining if the page
is accessible (it could be corrupted and have been evicted again).
If the caller is holding other page latches so that waiting for this
page latch could lead to lock order inversion (latching order violation),
the mode c=FIX_WAIT_READ must not be used.
@param id page identifier
@param err error code (will only be assigned when returning nullptr)
@param c how to handle conflicts
@return undo log page, buffer-fixed
@retval -1 if c=FIX_NOWAIT and buffer-fixing would require waiting
@retval nullptr if the undo page was corrupted or freed */
buf_block_t
*
page_fix
(
const
page_id_t
id
);
buf_block_t
*
page_fix
(
const
page_id_t
id
,
dberr_t
*
err
,
page_fix_conflicts
c
);
buf_block_t
*
page_fix
(
const
page_id_t
id
)
{
return
page_fix
(
id
,
nullptr
,
FIX_WAIT_READ
);
}
/** Decompress a page and relocate the block descriptor
@param b buffer-fixed compressed-only ROW_FORMAT=COMPRESSED page
@param chain hash table chain for b->id().fold()
@return the decompressed block, x-latched and read-fixed
@retval nullptr if the decompression failed (b->unfix() will be invoked) */
ATTRIBUTE_COLD
__attribute__
((
nonnull
,
warn_unused_result
))
buf_block_t
*
unzip
(
buf_page_t
*
b
,
hash_chain
&
chain
);
/** @return whether the buffer pool contains a page
@param page_id page identifier
...
...
@@ -1572,8 +1612,8 @@ class buf_pool_t
/** map of block->frame to buf_block_t blocks that belong
to buf_buddy_alloc(); protected by buf_pool.mutex */
hash_table_t
zip_hash
;
Atomic_counter
<
ulint
>
n_pend_unzip
;
/*!< number of pending decompressions */
/** number of pending unzip() */
Atomic_counter
<
ulint
>
n_pend_unzip
;
time_t
last_printout_time
;
/*!< when buf_print_io was last time
...
...
storage/innobase/include/buf0rea.h
View file @
f0de610d
...
...
@@ -33,15 +33,14 @@ buffer buf_pool if it is not already there. Sets the io_fix flag and sets
an exclusive lock on the buffer frame. The flag is cleared and the x-lock
released by the i/o-handler thread.
@param page_id page id
@param zip_size ROW_FORMAT=COMPRESSED page size, or 0
@param chain buf_pool.page_hash cell for page_id
@retval DB_SUCCESS if the page was read and is not corrupted
,
@retval DB_SUCCESS if the page was read and is not corrupted
@retval DB_SUCCESS_LOCKED_REC if the page was not read
@retval DB_PAGE_CORRUPTED if page based on checksum check is corrupted,
@retval DB_DECRYPTION_FAILED if page post encryption checksum matches but
after decryption normal page checksum does not match.
@retval DB_TABLESPACE_DELETED if tablespace .ibd file is missing */
dberr_t
buf_read_page
(
const
page_id_t
page_id
,
ulint
zip_size
,
dberr_t
buf_read_page
(
const
page_id_t
page_id
,
buf_pool_t
::
hash_chain
&
chain
);
/** High-level function which reads a page asynchronously from a file to the
...
...
@@ -63,9 +62,8 @@ pages: to avoid deadlocks this function must be written such that it cannot
end up waiting for these latches!
@param[in] page_id page id of a page which the current thread
wants to access
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@return number of page read requests issued */
ulint
buf_read_ahead_random
(
const
page_id_t
page_id
,
ulint
zip_size
);
ulint
buf_read_ahead_random
(
const
page_id_t
page_id
);
/** Applies linear read-ahead if in the buf_pool the page is a border page of
a linear read-ahead area and all the pages in the area have been accessed.
...
...
@@ -87,9 +85,8 @@ NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this
function must be written such that it cannot end up waiting for these
latches!
@param[in] page_id page id; see NOTE 3 above
@param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0
@return number of page read requests issued */
ulint
buf_read_ahead_linear
(
const
page_id_t
page_id
,
ulint
zip_size
);
ulint
buf_read_ahead_linear
(
const
page_id_t
page_id
);
/** Schedule a page for recovery.
@param space tablespace
...
...
storage/innobase/row/row0import.cc
View file @
f0de610d
...
...
@@ -2178,36 +2178,41 @@ updated then its state must be set to BUF_PAGE_NOT_USED.
@retval DB_SUCCESS or error code. */
dberr_t
PageConverter
::
operator
()(
buf_block_t
*
block
)
UNIV_NOTHROW
{
/* If we already had an old page with matching number
in the buffer pool, evict it now, because
we no longer evict the pages on DISCARD TABLESPACE. */
buf_page_get_gen
(
block
->
page
.
id
(),
get_zip_size
(),
RW_NO_LATCH
,
nullptr
,
BUF_PEEK_IF_IN_POOL
,
nullptr
,
nullptr
,
nullptr
);
/* If we already had an old page with matching number in the buffer
pool, evict it now, because we no longer evict the pages on
DISCARD TABLESPACE. */
if
(
buf_block_t
*
b
=
buf_pool
.
page_fix
(
block
->
page
.
id
(),
nullptr
,
buf_pool_t
::
FIX_ALSO_FREED
))
{
ut_ad
(
!
b
->
page
.
oldest_modification
());
mysql_mutex_lock
(
&
buf_pool
.
mutex
);
b
->
unfix
();
if
(
!
buf_LRU_free_page
(
&
b
->
page
,
true
))
ut_ad
(
0
);
mysql_mutex_unlock
(
&
buf_pool
.
mutex
);
}
uint16_t
page_type
;
if
(
dberr_t
err
=
update_page
(
block
,
page_type
))
{
if
(
dberr_t
err
=
update_page
(
block
,
page_type
))
return
err
;
}
const
bool
full_crc32
=
fil_space_t
::
full_crc32
(
get_space_flags
());
byte
*
frame
=
get_frame
(
block
);
const
bool
full_crc32
=
fil_space_t
::
full_crc32
(
get_space_flags
());
byte
*
frame
=
get_frame
(
block
);
memset_aligned
<
8
>
(
frame
+
FIL_PAGE_LSN
,
0
,
8
);
if
(
!
block
->
page
.
zip
.
data
)
{
buf_flush_init_for_writing
(
NULL
,
block
->
page
.
frame
,
NULL
,
full_crc32
);
}
else
if
(
fil_page_type_is_index
(
page_type
))
{
buf_flush_init_for_writing
(
NULL
,
block
->
page
.
zip
.
data
,
&
block
->
page
.
zip
,
if
(
!
block
->
page
.
zip
.
data
)
buf_flush_init_for_writing
(
nullptr
,
block
->
page
.
frame
,
nullptr
,
full_crc32
);
}
else
{
else
if
(
fil_page_type_is_index
(
page_type
))
buf_flush_init_for_writing
(
nullptr
,
block
->
page
.
zip
.
data
,
&
block
->
page
.
zip
,
full_crc32
);
else
/* Calculate and update the checksum of non-index
pages for ROW_FORMAT=COMPRESSED tables. */
buf_flush_update_zip_checksum
(
block
->
page
.
zip
.
data
,
block
->
zip_size
());
}
buf_flush_update_zip_checksum
(
block
->
page
.
zip
.
data
,
block
->
zip_size
());
return
DB_SUCCESS
;
}
...
...
storage/innobase/row/row0merge.cc
View file @
f0de610d
...
...
@@ -2157,38 +2157,6 @@ row_merge_read_clustered_index(
mem_heap_empty
(
row_heap
);
if
(
!
mtr_started
)
{
goto
scan_next
;
}
if
(
clust_index
->
lock
.
is_waiting
())
{
/* There are waiters on the clustered
index tree lock, likely the purge
thread. Store and restore the cursor
position, and yield so that scanning a
large table will not starve other
threads. */
/* Store the cursor position on the last user
record on the page. */
if
(
!
btr_pcur_move_to_prev_on_page
(
&
pcur
))
{
goto
corrupted_index
;
}
/* Leaf pages must never be empty, unless
this is the only page in the index tree. */
if
(
!
btr_pcur_is_on_user_rec
(
&
pcur
)
&&
btr_pcur_get_block
(
&
pcur
)
->
page
.
id
()
.
page_no
()
!=
clust_index
->
page
)
{
goto
corrupted_index
;
}
btr_pcur_store_position
(
&
pcur
,
&
mtr
);
mtr
.
commit
();
mtr_started
=
false
;
/* Give the waiters a chance to proceed. */
std
::
this_thread
::
yield
();
scan_next:
ut_ad
(
!
mtr_started
);
ut_ad
(
!
mtr
.
is_active
());
mtr
.
start
();
mtr_started
=
true
;
...
...
@@ -2236,14 +2204,14 @@ row_merge_read_clustered_index(
buf_page_make_young_if_needed
(
&
block
->
page
);
const
auto
s
=
mtr
.
get_savepoint
();
mtr
.
rollback_to_savepoint
(
s
-
2
,
s
-
1
);
page_cur_set_before_first
(
block
,
cur
);
if
(
!
page_cur_move_to_next
(
cur
)
||
page_cur_is_after_last
(
cur
))
{
goto
corrupted_rec
;
}
const
auto
s
=
mtr
.
get_savepoint
();
mtr
.
rollback_to_savepoint
(
s
-
2
,
s
-
1
);
}
}
else
{
mem_heap_empty
(
row_heap
);
...
...
storage/innobase/trx/trx0undo.cc
View file @
f0de610d
...
...
@@ -185,7 +185,7 @@ trx_undo_get_prev_rec_from_prev_page(buf_block_t *&block, uint16_t rec,
return
nullptr
;
if
(
!
buf_page_make_young_if_needed
(
&
block
->
page
))
buf_read_ahead_linear
(
block
->
page
.
id
()
,
0
);
buf_read_ahead_linear
(
block
->
page
.
id
());
return
trx_undo_page_get_last_rec
(
block
,
page_no
,
offset
);
}
...
...
@@ -242,7 +242,7 @@ trx_undo_get_prev_rec(buf_block_t *&block, uint16_t rec, uint32_t page_no,
static
trx_undo_rec_t
*
trx_undo_get_next_rec_from_next_page
(
const
buf_block_t
*&
block
,
uint32_t
page_no
,
uint16_t
offset
,
ulin
t
mode
,
mtr_t
*
mtr
)
rw_lock_type_
t
mode
,
mtr_t
*
mtr
)
{
if
(
page_no
==
block
->
page
.
id
().
page_no
()
&&
mach_read_from_2
(
block
->
page
.
frame
+
offset
+
TRX_UNDO_NEXT_LOG
))
...
...
@@ -272,7 +272,8 @@ trx_undo_get_next_rec_from_next_page(const buf_block_t *&block,
@retval nullptr if none */
static
trx_undo_rec_t
*
trx_undo_get_first_rec
(
const
fil_space_t
&
space
,
uint32_t
page_no
,
uint16_t
offset
,
ulint
mode
,
const
buf_block_t
*&
block
,
uint16_t
offset
,
rw_lock_type_t
mode
,
const
buf_block_t
*&
block
,
mtr_t
*
mtr
,
dberr_t
*
err
)
{
buf_block_t
*
b
=
buf_page_get_gen
(
page_id_t
{
space
.
id
,
page_no
},
0
,
mode
,
...
...
@@ -282,7 +283,7 @@ trx_undo_get_first_rec(const fil_space_t &space, uint32_t page_no,
return
nullptr
;
if
(
!
buf_page_make_young_if_needed
(
&
b
->
page
))
buf_read_ahead_linear
(
b
->
page
.
id
()
,
0
);
buf_read_ahead_linear
(
b
->
page
.
id
());
if
(
trx_undo_rec_t
*
rec
=
trx_undo_page_get_first_rec
(
b
,
page_no
,
offset
))
return
rec
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment