Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
M
MariaDB
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
nexedi
MariaDB
Commits
35d626f0
Commit
35d626f0
authored
Feb 27, 2008
by
sunny
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
branches/innodb+: Delete buffer port from branches/fts:r2283
parent
8c24ad9e
Changes
25
Hide whitespace changes
Inline
Side-by-side
Showing
25 changed files
with
1921 additions
and
1008 deletions
+1921
-1008
btr/btr0btr.c
btr/btr0btr.c
+14
-1
btr/btr0cur.c
btr/btr0cur.c
+269
-125
buf/buf0buddy.c
buf/buf0buddy.c
+13
-11
buf/buf0buf.c
buf/buf0buf.c
+123
-9
buf/buf0lru.c
buf/buf0lru.c
+1
-1
fil/fil0fil.c
fil/fil0fil.c
+2
-52
fsp/fsp0fsp.c
fsp/fsp0fsp.c
+4
-4
ibuf/ibuf0ibuf.c
ibuf/ibuf0ibuf.c
+1061
-669
include/btr0btr.h
include/btr0btr.h
+15
-0
include/btr0btr.ic
include/btr0btr.ic
+3
-3
include/btr0cur.h
include/btr0cur.h
+45
-2
include/btr0pcur.h
include/btr0pcur.h
+10
-0
include/btr0pcur.ic
include/btr0pcur.ic
+22
-0
include/buf0buf.h
include/buf0buf.h
+37
-5
include/fil0fil.h
include/fil0fil.h
+0
-16
include/ibuf0ibuf.h
include/ibuf0ibuf.h
+38
-36
include/ibuf0ibuf.ic
include/ibuf0ibuf.ic
+28
-27
include/ibuf0types.h
include/ibuf0types.h
+0
-1
include/row0row.h
include/row0row.h
+3
-0
include/univ.i
include/univ.i
+6
-0
row/row0purge.c
row/row0purge.c
+125
-8
row/row0row.c
row/row0row.c
+35
-1
row/row0uins.c
row/row0uins.c
+1
-1
row/row0umod.c
row/row0umod.c
+2
-2
row/row0upd.c
row/row0upd.c
+64
-34
No files found.
btr/btr0btr.c
View file @
35d626f0
...
...
@@ -558,6 +558,7 @@ btr_page_get_father_node_ptr(
its page x-latched */
mtr_t
*
mtr
)
/* in: mtr */
{
page_t
*
page
;
dtuple_t
*
tuple
;
rec_t
*
user_rec
;
rec_t
*
node_ptr
;
...
...
@@ -574,7 +575,19 @@ btr_page_get_father_node_ptr(
ut_ad
(
dict_index_get_page
(
index
)
!=
page_no
);
level
=
btr_page_get_level
(
btr_cur_get_page
(
cursor
),
mtr
);
user_rec
=
btr_cur_get_rec
(
cursor
);
page
=
btr_cur_get_page
(
cursor
);
if
(
UNIV_UNLIKELY
(
page_get_n_recs
(
page
)
==
0
))
{
/* Empty pages can result from buffered delete operations.
The first record from the free list can be used to find the
father node. */
user_rec
=
page_header_get_ptr
(
page
,
PAGE_FREE
);
ut_a
(
user_rec
);
}
else
{
user_rec
=
btr_cur_get_rec
(
cursor
);
}
ut_a
(
page_rec_is_user_rec
(
user_rec
));
tuple
=
dict_index_build_node_ptr
(
index
,
user_rec
,
0
,
heap
,
level
);
...
...
btr/btr0cur.c
View file @
35d626f0
...
...
@@ -39,6 +39,14 @@ Created 10/16/1994 Heikki Tuuri
#include "lock0lock.h"
#include "zlib.h"
/* Btree operation types, introduced as part of delete buffering. */
typedef
enum
btr_op_enum
{
BTR_NO_OP
=
0
,
BTR_INSERT_OP
,
BTR_DELETE_OP
,
BTR_DELMARK_OP
}
btr_op_t
;
#ifdef UNIV_DEBUG
/* If the following is set to TRUE, this module prints a lot of
trace information of individual record operations */
...
...
@@ -139,6 +147,8 @@ btr_rec_get_externally_stored_len(
rec_t
*
rec
,
/* in: record */
const
ulint
*
offsets
);
/* in: array returned by rec_get_offsets() */
/**********************************************************
The following function is used to set the deleted bit of a record. */
UNIV_INLINE
...
...
@@ -148,7 +158,7 @@ btr_rec_set_deleted_flag(
/* out: TRUE on success;
FALSE on page_zip overflow */
rec_t
*
rec
,
/* in/out: physical record */
page_zip_des_t
*
page_zip
,
/* in/out: compressed page (or NULL) */
page_zip_des_t
*
page_zip
,
/* in/out: compressed page (or NULL) */
ulint
flag
)
/* in: nonzero if delete marked */
{
if
(
page_rec_is_comp
(
rec
))
{
...
...
@@ -306,25 +316,29 @@ btr_cur_search_to_nth_level(
RW_S_LATCH, or 0 */
mtr_t
*
mtr
)
/* in: mtr */
{
page_cur_t
*
page_cursor
;
page_t
*
page
;
buf_block_t
*
block
;
ulint
space
;
buf_block_t
*
guess
;
ulint
height
;
rec_t
*
node_ptr
;
ulint
page_no
;
ulint
space
;
ulint
up_match
;
ulint
up_bytes
;
ulint
low_match
;
ulint
low_bytes
;
ulint
height
;
ulint
savepoint
;
ulint
rw_latch
;
ulint
page_mode
;
ulint
insert_planned
;
ulint
buf_mode
;
ulint
estimate
;
ulint
zip_size
;
ulint
watch_leaf
;
page_cur_t
*
page_cursor
;
ulint
ignore_sec_unique
;
btr_op_t
btr_op
=
BTR_NO_OP
;
ulint
root_height
=
0
;
/* remove warning */
#ifdef BTR_CUR_ADAPT
btr_search_t
*
info
;
#endif
...
...
@@ -344,17 +358,38 @@ btr_cur_search_to_nth_level(
cursor
->
up_match
=
ULINT_UNDEFINED
;
cursor
->
low_match
=
ULINT_UNDEFINED
;
#endif
insert_planned
=
latch_mode
&
BTR_INSERT
;
/* This flags are mutually exclusive, they are lumped together
with the latch mode for historical reasons. It's possible for
none of the flags to be set. */
if
(
latch_mode
&
BTR_INSERT
)
{
btr_op
=
BTR_INSERT_OP
;
}
else
if
(
latch_mode
&
BTR_DELETE
)
{
btr_op
=
BTR_DELETE_OP
;
}
else
if
(
latch_mode
&
BTR_DELETE_MARK
)
{
btr_op
=
BTR_DELMARK_OP
;
}
watch_leaf
=
latch_mode
&
BTR_WATCH_LEAF
;
estimate
=
latch_mode
&
BTR_ESTIMATE
;
ignore_sec_unique
=
latch_mode
&
BTR_IGNORE_SEC_UNIQUE
;
latch_mode
=
latch_mode
&
~
(
BTR_INSERT
|
BTR_ESTIMATE
|
BTR_IGNORE_SEC_UNIQUE
);
ut_ad
(
!
insert_planned
||
(
mode
==
PAGE_CUR_LE
));
/* Turn the flags unrelated to the latch mode off. */
latch_mode
&=
~
(
BTR_INSERT
|
BTR_DELETE_MARK
|
BTR_DELETE
|
BTR_ESTIMATE
|
BTR_IGNORE_SEC_UNIQUE
|
BTR_WATCH_LEAF
);
cursor
->
flag
=
BTR_CUR_BINARY
;
cursor
->
index
=
index
;
cursor
->
leaf_in_buf_pool
=
FALSE
;
cursor
->
ibuf_cnt
=
ULINT_UNDEFINED
;
#ifndef BTR_CUR_ADAPT
guess
=
NULL
;
#else
...
...
@@ -367,9 +402,17 @@ btr_cur_search_to_nth_level(
#ifdef UNIV_SEARCH_PERF_STAT
info
->
n_searches
++
;
#endif
/* TODO: investigate if there is any real reason for forbidding
adaptive hash usage when watch_leaf is true.*/
/* Ibuf does not use adaptive hash; this is prevented by the
latch_mode check below. */
if
(
btr_search_latch
.
writer
==
RW_LOCK_NOT_LOCKED
&&
latch_mode
<=
BTR_MODIFY_LEAF
&&
info
->
last_hash_succ
&&
latch_mode
<=
BTR_MODIFY_LEAF
&&
info
->
last_hash_succ
&&
!
estimate
&&
!
watch_leaf
#ifdef PAGE_CUR_LE_OR_EXTENDS
&&
mode
!=
PAGE_CUR_LE_OR_EXTENDS
#endif
/* PAGE_CUR_LE_OR_EXTENDS */
...
...
@@ -390,8 +433,9 @@ btr_cur_search_to_nth_level(
return
;
}
#endif
#endif
#endif
/* BTR_CUR_HASH_ADAPT */
#endif
/* BTR_CUR_ADAPT */
btr_cur_n_non_sea
++
;
/* If the hash search did not succeed, do binary search down the
...
...
@@ -456,154 +500,228 @@ btr_cur_search_to_nth_level(
/* Loop and search until we arrive at the desired level */
for
(;;)
{
ulint
zip_size
;
buf_block_t
*
block
;
search_loop:
if
(
height
==
0
)
{
if
(
watch_leaf
)
{
buf_mode
=
BUF_GET_IF_IN_POOL
;
}
else
if
(
latch_mode
<=
BTR_MODIFY_LEAF
)
{
rw_latch
=
latch_mode
;
if
(
btr_op
!=
BTR_NO_OP
&&
ibuf_should_try
(
index
,
ignore_sec_unique
))
{
/* Try insert/delete mark/delete to the
insert/delete buffer if the page is not in
the buffer pool */
buf_mode
=
BUF_GET_IF_IN_POOL
;
}
}
}
retry_page_get:
zip_size
=
dict_table_zip_size
(
index
->
table
);
zip_size
=
dict_table_zip_size
(
index
->
table
);
block
=
buf_page_get_gen
(
space
,
zip_size
,
page_no
,
rw_latch
,
guess
,
buf_mode
,
__FILE__
,
__LINE__
,
mtr
);
if
(
watch_leaf
&&
height
==
0
)
{
ut_a
(
buf_mode
==
BUF_GET_IF_IN_POOL
);
buf_mode
=
BUF_GET_IF_IN_POOL_OR_WATCH
;
}
block
=
buf_page_get_gen
(
space
,
zip_size
,
page_no
,
rw_latch
,
guess
,
buf_mode
,
__FILE__
,
__LINE__
,
mtr
);
if
(
watch_leaf
&&
height
==
0
)
{
cursor
->
leaf_in_buf_pool
=
!!
block
;
/* We didn't find a page but we set a watch on it. */
if
(
block
==
NULL
)
{
/* This must be a search to perform an insert;
try insert to the insert buffer */
ut_ad
(
buf_mode
==
BUF_GET_IF_IN_POOL
);
ut_ad
(
insert_planned
);
ut_ad
(
cursor
->
thr
);
if
(
ibuf_should_try
(
index
,
ignore_sec_unique
)
&&
ibuf_insert
(
tuple
,
index
,
space
,
zip_size
,
page_no
,
cursor
->
thr
))
{
/* Insertion to the insert buffer succeeded */
cursor
->
flag
=
BTR_CUR_INSERT_TO_IBUF
;
if
(
UNIV_LIKELY_NULL
(
heap
))
{
mem_heap_free
(
heap
);
cursor
->
flag
=
BTR_CUR_ABORTED
;
goto
func_exit
;
}
}
if
(
block
==
NULL
)
{
/* This must be a search to perform an insert/delete
mark/ delete; try using the insert/delete buffer */
ut_ad
(
buf_mode
==
BUF_GET_IF_IN_POOL
);
ut_ad
(
cursor
->
thr
);
if
(
ibuf_should_try
(
index
,
ignore_sec_unique
))
{
switch
(
btr_op
)
{
case
BTR_INSERT_OP
:
if
(
ibuf_insert
(
IBUF_OP_INSERT
,
tuple
,
index
,
space
,
zip_size
,
page_no
,
cursor
->
thr
))
{
cursor
->
flag
=
BTR_CUR_INSERT_TO_IBUF
;
goto
func_exit
;
}
break
;
case
BTR_DELMARK_OP
:
if
(
ibuf_insert
(
IBUF_OP_DELETE_MARK
,
tuple
,
index
,
space
,
zip_size
,
page_no
,
cursor
->
thr
))
{
cursor
->
flag
=
BTR_CUR_DEL_MARK_IBUF
;
goto
func_exit
;
}
goto
func_exit
;
}
/* Insert to the insert buffer did not succeed:
retry page get */
break
;
case
BTR_DELETE_OP
:
if
(
ibuf_insert
(
IBUF_OP_DELETE
,
tuple
,
index
,
space
,
zip_size
,
page_no
,
cursor
->
thr
))
{
buf_mode
=
BUF_GET
;
cursor
->
flag
=
BTR_CUR_DELETE_IBUF
;
goto
retry_page_get
;
goto
func_exit
;
}
break
;
default:
ut_error
;
}
}
page
=
buf_block_get_frame
(
block
);
/* Insert to the insert/delete buffer did not succeed, we
must read the page from disk. */
buf_mode
=
BUF_GET
;
goto
retry_page_get
;
}
block
->
check_index_page_at_flush
=
TRUE
;
page
=
buf_block_get_frame
(
block
);
#ifdef UNIV_ZIP_DEBUG
if
(
rw_latch
!=
RW_NO_LATCH
)
{
const
page_zip_des_t
*
page_zip
=
buf_block_get_page_zip
(
block
);
ut_a
(
!
page_zip
||
page_zip_validate
(
page_zip
,
page
));
}
#endif
/* UNIV_ZIP_DEBUG */
if
(
rw_latch
!=
RW_NO_LATCH
)
{
const
page_zip_des_t
*
page_zip
;
block
->
check_index_page_at_flush
=
TRUE
;
page_zip
=
buf_block_get_page_zip
(
block
);
ut_a
(
!
page_zip
||
page_zip_validate
(
page_zip
,
page
));
}
#endif
/* UNIV_ZIP_DEBUG */
#ifdef UNIV_SYNC_DEBUG
if
(
rw_latch
!=
RW_NO_LATCH
)
{
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
}
if
(
rw_latch
!=
RW_NO_LATCH
)
{
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
}
#endif
ut_ad
(
0
==
ut_dulint_cmp
(
index
->
id
,
btr_page_get_index_id
(
page
)));
ut_ad
(
0
==
ut_dulint_cmp
(
index
->
id
,
btr_page_get_index_id
(
page
)));
if
(
UNIV_UNLIKELY
(
height
==
ULINT_UNDEFINED
))
{
/* We are in the root node */
if
(
UNIV_UNLIKELY
(
height
==
ULINT_UNDEFINED
))
{
/* We are in the root node */
height
=
btr_page_get_level
(
page
,
mtr
);
root_height
=
height
;
cursor
->
tree_height
=
root_height
+
1
;
height
=
btr_page_get_level
(
page
,
mtr
);
root_height
=
height
;
cursor
->
tree_height
=
root_height
+
1
;
/* 1-level trees must be handled here
for BTR_WATCH_LEAF. */
if
(
watch_leaf
&&
height
==
0
)
{
cursor
->
leaf_in_buf_pool
=
TRUE
;
}
#ifdef BTR_CUR_ADAPT
if
(
block
!=
guess
)
{
info
->
root_guess
=
block
;
}
#endif
if
(
block
!=
guess
)
{
info
->
root_guess
=
block
;
}
#endif
}
if
(
height
==
0
)
{
if
(
rw_latch
==
RW_NO_LATCH
)
{
btr_cur_latch_leaves
(
page
,
space
,
zip_size
,
page_no
,
latch_mode
,
cursor
,
mtr
);
}
if
(
height
==
0
)
{
if
(
rw_latch
==
RW_NO_LATCH
)
{
if
((
latch_mode
!=
BTR_MODIFY_TREE
)
&&
(
latch_mode
!=
BTR_CONT_MODIFY_TREE
))
{
btr_cur_latch_leaves
(
page
,
space
,
zip_size
,
page_no
,
latch_mode
,
cursor
,
mtr
);
}
/* Release the tree s-latch */
if
(
latch_mode
!=
BTR_MODIFY_TREE
&&
latch_mode
!=
BTR_CONT_MODIFY_TREE
)
{
mtr_release_s_latch_at_savepoint
(
mtr
,
savepoint
,
dict_index_get_lock
(
index
));
}
/* Release the tree s-latch */
page_mode
=
mode
;
mtr_release_s_latch_at_savepoint
(
mtr
,
savepoint
,
dict_index_get_lock
(
index
));
}
page_cur_search_with_match
(
block
,
index
,
tuple
,
page_mode
,
&
up_match
,
&
up_bytes
,
&
low_match
,
&
low_bytes
,
page_cursor
);
page_mode
=
mode
;
}
if
(
estimate
)
{
btr_cur_add_path_info
(
cursor
,
height
,
root_height
);
}
page_cur_search_with_match
(
block
,
index
,
tuple
,
page_mode
,
&
up_match
,
&
up_bytes
,
&
low_match
,
&
low_bytes
,
page_cursor
);
/* If this is the desired level, leave the loop */
if
(
estimate
)
{
btr_cur_add_path_info
(
cursor
,
height
,
root_height
);
}
ut_ad
(
height
==
btr_page_get_level
(
page_cur_get_page
(
page_cursor
),
mtr
));
/* If this is the desired level, leave the loop */
if
(
level
==
height
)
{
ut_ad
(
height
==
btr_page_get_level
(
page_cur_get_page
(
page_cursor
),
mtr
));
if
(
level
>
0
)
{
/* x-latch the page */
page
=
btr_page_get
(
space
,
zip_size
,
page_no
,
RW_X_LATCH
,
mtr
);
ut_a
((
ibool
)
!!
page_is_comp
(
page
)
==
dict_table_is_comp
(
index
->
table
));
}
if
(
level
==
height
)
{
break
;
if
(
level
>
0
)
{
/* x-latch the page */
page
=
btr_page_get
(
space
,
zip_size
,
page_no
,
RW_X_LATCH
,
mtr
);
ut_a
((
ibool
)
!!
page_is_comp
(
page
)
==
dict_table_is_comp
(
index
->
table
));
}
ut_ad
(
height
>
0
);
goto
loop_end
;
}
height
--
;
ut_ad
(
height
>
0
)
;
if
((
height
==
0
)
&&
(
latch_mode
<=
BTR_MODIFY_LEAF
))
{
height
--
;
rw_latch
=
latch_mode
;
node_ptr
=
page_cur_get_rec
(
page_cursor
)
;
if
(
insert_planned
&&
ibuf_should_try
(
index
,
ignore_sec_unique
))
{
offsets
=
rec_get_offsets
(
node_ptr
,
cursor
->
index
,
offsets
,
ULINT_UNDEFINED
,
&
heap
);
/* Try insert to the insert buffer if the
page is not in the buffer pool */
/* Go to the child node */
page_no
=
btr_node_ptr_get_child_page_no
(
node_ptr
,
offsets
);
buf_mode
=
BUF_GET_IF_IN_POOL
;
}
}
if
(
index
->
type
&
DICT_IBUF
&&
height
==
level
)
{
/* We're doing a search on an ibuf tree and we're one level
above the leaf page. (Assuming level == 0, which it should
be.) */
guess
=
NULL
;
ulint
is_min_rec
;
node_ptr
=
page_cur_get_rec
(
page_cursor
);
offsets
=
rec_get_offsets
(
node_ptr
,
cursor
->
index
,
offsets
,
ULINT_UNDEFINED
,
&
heap
);
/* Go to the child node */
page_no
=
btr_node_ptr_get_child_page_no
(
node_ptr
,
offsets
);
}
is_min_rec
=
rec_get_info_bits
(
node_ptr
,
0
)
&
REC_INFO_MIN_REC_FLAG
;
if
(
UNIV_LIKELY_NULL
(
heap
))
{
mem_heap_free
(
heap
);
if
(
!
is_min_rec
)
{
cursor
->
ibuf_cnt
=
ibuf_rec_get_fake_counter
(
node_ptr
);
ut_a
(
cursor
->
ibuf_cnt
<=
0xFFFF
||
cursor
->
ibuf_cnt
==
ULINT_UNDEFINED
);
}
}
goto
search_loop
;
loop_end:
if
(
level
==
0
)
{
cursor
->
low_match
=
low_match
;
cursor
->
low_bytes
=
low_bytes
;
...
...
@@ -625,6 +743,11 @@ btr_cur_search_to_nth_level(
}
func_exit:
if
(
UNIV_LIKELY_NULL
(
heap
))
{
mem_heap_free
(
heap
);
}
if
(
has_search_latch
)
{
rw_lock_s_lock
(
&
btr_search_latch
);
...
...
@@ -686,8 +809,7 @@ btr_cur_open_at_index_side(
page_t
*
page
;
block
=
buf_page_get_gen
(
space
,
zip_size
,
page_no
,
RW_NO_LATCH
,
NULL
,
BUF_GET
,
__FILE__
,
__LINE__
,
mtr
);
__FILE__
,
__LINE__
,
mtr
);
page
=
buf_block_get_frame
(
block
);
ut_ad
(
0
==
ut_dulint_cmp
(
index
->
id
,
btr_page_get_index_id
(
page
)));
...
...
@@ -806,8 +928,7 @@ btr_cur_open_at_rnd_pos(
block
=
buf_page_get_gen
(
space
,
zip_size
,
page_no
,
RW_NO_LATCH
,
NULL
,
BUF_GET
,
__FILE__
,
__LINE__
,
mtr
);
__FILE__
,
__LINE__
,
mtr
);
page
=
buf_block_get_frame
(
block
);
ut_ad
(
0
==
ut_dulint_cmp
(
index
->
id
,
btr_page_get_index_id
(
page
)));
...
...
@@ -2651,7 +2772,7 @@ btr_cur_del_mark_set_sec_rec(
}
/***************************************************************
Sets a secondary index record
delete mark to FALSE
. This function is only
Sets a secondary index record
'd delete mark to value
. This function is only
used by the insert buffer insert merge mechanism. */
UNIV_INTERN
void
...
...
@@ -2662,14 +2783,38 @@ btr_cur_del_unmark_for_ibuf(
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool
val
,
/* in: value to set */
mtr_t
*
mtr
)
/* in: mtr */
{
/* We do not need to reserve btr_search_latch, as the page has just
been read to the buffer pool and there cannot be a hash index to it. */
btr_rec_set_deleted_flag
(
rec
,
page_zip
,
FALSE
);
btr_rec_set_deleted_flag
(
rec
,
page_zip
,
val
);
btr_cur_del_mark_set_sec_rec_log
(
rec
,
val
,
mtr
);
}
/***************************************************************
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
void
btr_cur_set_deleted_flag_for_ibuf
(
/*==============================*/
rec_t
*
rec
,
/* in: record */
page_zip_des_t
*
page_zip
,
/* in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool
val
,
/* in: value to set */
mtr_t
*
mtr
)
/* in: mtr */
{
/* We do not need to reserve btr_search_latch, as the page has just
been read to the buffer pool and there cannot be a hash index to it. */
btr_cur_del_mark_set_sec_rec_log
(
rec
,
FALSE
,
mtr
);
rec_set_deleted_flag_new
(
rec
,
page_zip
,
val
);
btr_cur_del_mark_set_sec_rec_log
(
rec
,
val
,
mtr
);
}
/*==================== B-TREE RECORD REMOVE =========================*/
...
...
@@ -2763,8 +2908,7 @@ btr_cur_optimistic_delete(
ut_a
(
!
page_zip
||
page_zip_validate
(
page_zip
,
page
));
#endif
/* UNIV_ZIP_DEBUG */
if
(
dict_index_is_clust
(
cursor
->
index
)
||
!
page_is_leaf
(
page
))
{
if
(
dict_index_is_clust
(
cursor
->
index
)
||
!
page_is_leaf
(
page
))
{
/* The insert buffer does not handle
inserts to clustered indexes or to non-leaf
pages of secondary index B-trees. */
...
...
buf/buf0buddy.c
View file @
35d626f0
...
...
@@ -213,6 +213,7 @@ buf_buddy_block_register(
buf_block_t
*
block
)
/* in: buffer frame to allocate */
{
const
ulint
fold
=
BUF_POOL_ZIP_FOLD
(
block
);
ut_ad
(
buf_pool_mutex_own
());
ut_ad
(
!
mutex_own
(
&
buf_pool_zip_mutex
));
...
...
@@ -224,6 +225,7 @@ buf_buddy_block_register(
ut_ad
(
!
block
->
page
.
in_page_hash
);
ut_ad
(
!
block
->
page
.
in_zip_hash
);
ut_d
(
block
->
page
.
in_zip_hash
=
TRUE
);
HASH_INSERT
(
buf_page_t
,
hash
,
buf_pool
->
zip_hash
,
fold
,
&
block
->
page
);
buf_buddy_n_frames
++
;
...
...
@@ -278,23 +280,21 @@ buf_buddy_alloc_clean(
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released */
{
ulint
count
;
buf_page_t
*
bpage
;
ut_ad
(
buf_pool_mutex_own
());
ut_ad
(
!
mutex_own
(
&
buf_pool_zip_mutex
));
if
(
buf_buddy_n_frames
<
buf_buddy_max_n_frames
)
{
goto
free_LRU
;
}
if
(
buf_buddy_n_frames
>=
buf_buddy_max_n_frames
&&
((
BUF_BUDDY_LOW
<<
i
)
>=
PAGE_ZIP_MIN_SIZE
&&
i
<
BUF_BUDDY_SIZES
))
{
if
(
BUF_BUDDY_LOW
<<
i
>=
PAGE_ZIP_MIN_SIZE
&&
i
<
BUF_BUDDY_SIZES
)
{
/* Try to find a clean compressed-only page
of the same size. */
page_zip_des_t
dummy_zip
;
ulint
j
;
page_zip_des_t
dummy_zip
;
page_zip_set_size
(
&
dummy_zip
,
BUF_BUDDY_LOW
<<
i
);
...
...
@@ -335,9 +335,12 @@ buf_buddy_alloc_clean(
/* Free blocks from the end of the LRU list until enough space
is available. */
count
=
0
;
free_LRU:
for
(
bpage
=
UT_LIST_GET_LAST
(
buf_pool
->
LRU
);
bpage
;
bpage
=
UT_LIST_GET_PREV
(
LRU
,
bpage
))
{
for
(
bpage
=
UT_LIST_GET_LAST
(
buf_pool
->
LRU
);
bpage
;
bpage
=
UT_LIST_GET_PREV
(
LRU
,
bpage
),
++
count
)
{
void
*
ret
;
mutex_t
*
block_mutex
=
buf_page_get_mutex
(
bpage
);
...
...
@@ -440,20 +443,19 @@ buf_buddy_alloc_low(
}
/* Try replacing a clean page in the buffer pool. */
block
=
buf_buddy_alloc_clean
(
i
,
lru
);
if
(
block
)
{
goto
func_exit
;
}
/* Try replacing an uncompressed page in the buffer pool. */
buf_pool_mutex_exit
();
block
=
buf_LRU_get_free_block
(
0
);
*
lru
=
TRUE
;
buf_pool_mutex_enter
();
alloc_big:
buf_buddy_block_register
(
block
);
...
...
buf/buf0buf.c
View file @
35d626f0
...
...
@@ -1346,6 +1346,69 @@ buf_pool_resize(void)
buf_pool_page_hash_rebuild
();
}
/********************************************************************
Add watch for the given page to be read in. Caller must have the buffer pool
mutex reserved. */
static
void
buf_pool_add_watch
(
/*===============*/
ulint
space
,
/* in: space id */
ulint
page_no
)
/* in: page number */
{
ut_ad
(
mutex_own
(
&
buf_pool_mutex
));
/* There can't be multiple watches at the same time. */
ut_a
(
!
buf_pool
->
watch_active
);
buf_pool
->
watch_active
=
TRUE
;
buf_pool
->
watch_space
=
space
;
buf_pool
->
watch_happened
=
FALSE
;
buf_pool
->
watch_page_no
=
page_no
;
}
/********************************************************************
Stop watching if the marked page is read in. */
UNIV_INTERN
void
buf_pool_remove_watch
(
void
)
/*=======================*/
{
buf_pool_mutex_enter
();
ut_ad
(
buf_pool
->
watch_active
);
buf_pool
->
watch_active
=
FALSE
;
buf_pool_mutex_exit
();
}
/********************************************************************
Check if the given page is being watched and has been read to the buffer
pool. */
UNIV_INTERN
ibool
buf_pool_watch_happened
(
/*====================*/
/* out: TRUE if the given page is being
watched and it has been read in */
ulint
space
,
/* in: space id */
ulint
page_no
)
/* in: page number */
{
ulint
ret
;
buf_pool_mutex_enter
();
ret
=
buf_pool
->
watch_active
&&
space
==
buf_pool
->
watch_space
&&
page_no
==
buf_pool
->
watch_page_no
&&
buf_pool
->
watch_happened
;
buf_pool_mutex_exit
();
return
(
ret
);
}
/************************************************************************
Moves to the block to the start of the LRU list if there is a danger
that the block would drift out of the buffer pool. */
...
...
@@ -1763,7 +1826,8 @@ buf_page_get_gen(
ulint
rw_latch
,
/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t
*
guess
,
/* in: guessed block or NULL */
ulint
mode
,
/* in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH, BUF_GET_NOWAIT */
BUF_GET_NO_LATCH, BUF_GET_NOWAIT or
BUF_GET_IF_IN_POOL_OR_WATCH*/
const
char
*
file
,
/* in: file name */
ulint
line
,
/* in: line where called */
mtr_t
*
mtr
)
/* in: mini-transaction */
...
...
@@ -1778,11 +1842,17 @@ buf_page_get_gen(
||
(
rw_latch
==
RW_X_LATCH
)
||
(
rw_latch
==
RW_NO_LATCH
));
ut_ad
((
mode
!=
BUF_GET_NO_LATCH
)
||
(
rw_latch
==
RW_NO_LATCH
));
ut_ad
((
mode
==
BUF_GET
)
||
(
mode
==
BUF_GET_IF_IN_POOL
)
||
(
mode
==
BUF_GET_NO_LATCH
)
||
(
mode
==
BUF_GET_NOWAIT
));
/* Check for acceptable modes. */
ut_ad
(
mode
==
BUF_GET
||
mode
==
BUF_GET_IF_IN_POOL
||
mode
==
BUF_GET_NO_LATCH
||
mode
==
BUF_GET_NOWAIT
||
mode
==
BUF_GET_IF_IN_POOL_OR_WATCH
);
ut_ad
(
zip_size
==
fil_space_get_zip_size
(
space
));
#ifndef UNIV_LOG_DEBUG
ut_ad
(
!
ibuf_inside
()
||
ibuf_page
(
space
,
zip_size
,
offset
));
ut_ad
(
!
ibuf_inside
()
||
ibuf_page
(
space
,
zip_size
,
offset
,
mtr
));
#endif
buf_pool
->
n_page_gets
++
;
loop:
...
...
@@ -1818,9 +1888,14 @@ buf_page_get_gen(
if
(
block
==
NULL
)
{
/* Page not in buf_pool: needs to be read from file */
if
(
mode
==
BUF_GET_IF_IN_POOL_OR_WATCH
)
{
buf_pool_add_watch
(
space
,
offset
);
}
buf_pool_mutex_exit
();
if
(
mode
==
BUF_GET_IF_IN_POOL
)
{
if
(
mode
==
BUF_GET_IF_IN_POOL
||
mode
==
BUF_GET_IF_IN_POOL_OR_WATCH
)
{
return
(
NULL
);
}
...
...
@@ -1837,7 +1912,18 @@ buf_page_get_gen(
must_read
=
buf_block_get_io_fix
(
block
)
==
BUF_IO_READ
;
if
(
must_read
&&
mode
==
BUF_GET_IF_IN_POOL
)
{
if
(
must_read
&&
(
mode
==
BUF_GET_IF_IN_POOL
||
mode
==
BUF_GET_IF_IN_POOL_OR_WATCH
))
{
/* The page is being read to bufer pool,
but we can't wait around for the read to
complete. */
if
(
mode
==
BUF_GET_IF_IN_POOL_OR_WATCH
)
{
buf_pool_add_watch
(
space
,
offset
);
}
/* The page is only being read to buffer */
buf_pool_mutex_exit
();
...
...
@@ -2140,7 +2226,7 @@ buf_page_optimistic_get_func(
ut_ad
(
!
ibuf_inside
()
||
ibuf_page
(
buf_block_get_space
(
block
),
buf_block_get_zip_size
(
block
),
buf_block_get_page_no
(
block
)));
buf_block_get_page_no
(
block
)
,
mtr
));
if
(
rw_latch
==
RW_S_LATCH
)
{
success
=
rw_lock_s_lock_func_nowait
(
&
(
block
->
lock
),
...
...
@@ -2392,6 +2478,25 @@ buf_page_init_low(
#endif
/* UNIV_DEBUG_FILE_ACCESSES */
}
/************************************************************************
Set watch happened flag. */
UNIV_INLINE
void
buf_page_notify_watch
(
/*==================*/
ulint
space
,
/* in: space id of page read in */
ulint
offset
)
/* in: offset of page read in */
{
ut_ad
(
buf_pool_mutex_own
());
if
(
buf_pool
->
watch_active
&&
space
==
buf_pool
->
watch_space
&&
offset
==
buf_pool
->
watch_page_no
)
{
buf_pool
->
watch_happened
=
TRUE
;
}
}
#ifdef UNIV_HOTBACKUP
/************************************************************************
Inits a page to the buffer buf_pool, for use in ibbackup --restore. */
...
...
@@ -2481,6 +2586,7 @@ buf_page_init(
}
buf_page_init_low
(
&
block
->
page
);
buf_page_notify_watch
(
space
,
offset
);
ut_ad
(
!
block
->
page
.
in_zip_hash
);
ut_ad
(
!
block
->
page
.
in_page_hash
);
...
...
@@ -2531,7 +2637,8 @@ buf_page_init_for_read(
mtr_start
(
&
mtr
);
if
(
!
ibuf_page_low
(
space
,
zip_size
,
offset
,
&
mtr
))
{
if
(
!
recv_no_ibuf_operations
&&
!
ibuf_page
(
space
,
zip_size
,
offset
,
&
mtr
))
{
mtr_commit
(
&
mtr
);
...
...
@@ -2583,7 +2690,9 @@ buf_page_init_for_read(
if
(
block
)
{
bpage
=
&
block
->
page
;
mutex_enter
(
&
block
->
mutex
);
buf_page_init
(
space
,
offset
,
block
);
buf_page_notify_watch
(
space
,
offset
);
/* The block must be put to the LRU list, to the old blocks */
buf_LRU_add_block
(
bpage
,
TRUE
/* to old blocks */
);
...
...
@@ -2650,11 +2759,15 @@ buf_page_init_for_read(
mutex_enter
(
&
buf_pool_zip_mutex
);
UNIV_MEM_DESC
(
bpage
->
zip
.
data
,
page_zip_get_size
(
&
bpage
->
zip
),
bpage
);
buf_page_init_low
(
bpage
);
buf_page_notify_watch
(
space
,
offset
);
bpage
->
state
=
BUF_BLOCK_ZIP_PAGE
;
bpage
->
space
=
space
;
bpage
->
offset
=
offset
;
#ifdef UNIV_DEBUG
bpage
->
in_page_hash
=
FALSE
;
bpage
->
in_zip_hash
=
FALSE
;
...
...
@@ -2748,6 +2861,7 @@ buf_page_create(
mutex_enter
(
&
block
->
mutex
);
buf_page_init
(
space
,
offset
,
block
);
buf_page_notify_watch
(
space
,
offset
);
/* The block must be put to the LRU list */
buf_LRU_add_block
(
&
block
->
page
,
FALSE
);
...
...
@@ -3539,7 +3653,7 @@ buf_print_io(
fprintf
(
file
,
"Buffer pool size %lu
\n
"
"Free buffers
%lu
\n
"
"Free buffers
%lu
\n
"
"Database pages %lu
\n
"
"Modified db pages %lu
\n
"
"Pending reads %lu
\n
"
...
...
buf/buf0lru.c
View file @
35d626f0
...
...
@@ -497,7 +497,7 @@ buf_LRU_get_free_block(
if
(
!
buf_lru_switched_on_innodb_mon
)
{
/* Over 67 % of the buffer pool is occupied by lock
/* Over 67 % of the buffer pool is occupied by lock
heaps or the adaptive hash index. This may be a memory
leak! */
...
...
fil/fil0fil.c
View file @
35d626f0
...
...
@@ -191,8 +191,6 @@ struct fil_space_struct {
currently in the list above */
UT_LIST_NODE_T
(
fil_space_t
)
space_list
;
/* list of all spaces */
ibuf_data_t
*
ibuf_data
;
/* insert buffer data */
ulint
magic_n
;
};
...
...
@@ -476,33 +474,6 @@ fil_space_get_type(
return
(
space
->
purpose
);
}
/***********************************************************************
Returns the ibuf data of a file space. */
UNIV_INTERN
ibuf_data_t
*
fil_space_get_ibuf_data
(
/*====================*/
/* out: ibuf data for this space */
ulint
id
)
/* in: space id */
{
fil_system_t
*
system
=
fil_system
;
fil_space_t
*
space
;
ut_ad
(
system
);
ut_a
(
id
==
0
);
mutex_enter
(
&
(
system
->
mutex
));
space
=
fil_space_get_by_id
(
id
);
mutex_exit
(
&
(
system
->
mutex
));
ut_a
(
space
);
return
(
space
->
ibuf_data
);
}
/**************************************************************************
Checks if all the file nodes in a space are flushed. The caller must hold
the fil_system mutex. */
...
...
@@ -1183,8 +1154,6 @@ fil_space_create(
UT_LIST_INIT
(
space
->
chain
);
space
->
magic_n
=
FIL_SPACE_MAGIC_N
;
space
->
ibuf_data
=
NULL
;
rw_lock_create
(
&
space
->
latch
,
SYNC_FSP
);
HASH_INSERT
(
fil_space_t
,
hash
,
system
->
spaces
,
id
,
space
);
...
...
@@ -1649,25 +1618,6 @@ fil_set_max_space_id_if_bigger(
mutex_exit
(
&
(
system
->
mutex
));
}
/********************************************************************
Initializes the ibuf data structure for space 0 == the system tablespace.
This can be called after the file space headers have been created and the
dictionary system has been initialized. */
UNIV_INTERN
void
fil_ibuf_init_at_db_start
(
void
)
/*===========================*/
{
fil_space_t
*
space
;
space
=
UT_LIST_GET_FIRST
(
fil_system
->
space_list
);
ut_a
(
space
);
ut_a
(
space
->
purpose
==
FIL_TABLESPACE
);
space
->
ibuf_data
=
ibuf_data_init_for_space
(
space
->
id
);
}
/********************************************************************
Writes the flushed lsn and the latest archived log number to the page header
of the first page of a data file of the system tablespace (space 0),
...
...
@@ -4266,13 +4216,13 @@ fil_io(
||
sync
||
is_log
);
#ifdef UNIV_SYNC_DEBUG
ut_ad
(
!
ibuf_inside
()
||
is_log
||
(
type
==
OS_FILE_WRITE
)
||
ibuf_page
(
space_id
,
zip_size
,
block_offset
));
||
ibuf_page
(
space_id
,
zip_size
,
block_offset
,
NULL
));
#endif
#endif
if
(
sync
)
{
mode
=
OS_AIO_SYNC
;
}
else
if
(
type
==
OS_FILE_READ
&&
!
is_log
&&
ibuf_page
(
space_id
,
zip_size
,
block_offset
))
{
&&
ibuf_page
(
space_id
,
zip_size
,
block_offset
,
NULL
))
{
mode
=
OS_AIO_IBUF
;
}
else
if
(
is_log
)
{
mode
=
OS_AIO_LOG
;
...
...
fsp/fsp0fsp.c
View file @
35d626f0
...
...
@@ -2191,8 +2191,8 @@ fseg_create_general(
/* This thread did not own the latch before this call: free
excess pages from the insert buffer free list */
if
(
space
==
0
)
{
ibuf_free_excess_pages
(
0
);
if
(
space
==
IBUF_SPACE_ID
)
{
ibuf_free_excess_pages
();
}
}
...
...
@@ -2759,8 +2759,8 @@ fseg_alloc_free_page_general(
/* This thread did not own the latch before this call: free
excess pages from the insert buffer free list */
if
(
space
==
0
)
{
ibuf_free_excess_pages
(
0
);
if
(
space
==
IBUF_SPACE_ID
)
{
ibuf_free_excess_pages
();
}
}
...
...
ibuf/ibuf0ibuf.c
View file @
35d626f0
...
...
@@ -62,7 +62,28 @@ is in the compact format. The presence of this marker can be detected by
looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
The high-order bit of the character set field in the type info is the
"nullable" flag for the field. */
"nullable" flag for the field.
In versions >= TODO:
The optional marker byte at the start of the fourth field is replaced by
mandatory 3 fields, totaling 4 bytes:
1. 2 bytes: Counter field, used to sort records within a (space id, page
no) in the order they were added. This is needed so that for example the
sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled
correctly.
2. 1 byte: Operation type (see ibuf_op_t).
3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT.
To ensure older records, which do not have counters to enforce correct
sorting, are merged before any new records, ibuf_insert checks if we're
trying to insert to a position that contains old-style records, and if so,
refuses the insert. Thus, ibuf pages are gradually converted to the new
format as their corresponding buffer pool pages are read into memory.
*/
/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
...
...
@@ -137,17 +158,18 @@ access order rules. */
/* Buffer pool size per the maximum insert buffer size */
#define IBUF_POOL_SIZE_PER_MAX_SIZE 2
/* Table name for the insert buffer. */
#define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
/* The insert buffer control structure */
UNIV_INTERN
ibuf_t
*
ibuf
=
NULL
;
static
ulint
ibuf_rnd
=
986058871
;
UNIV_INTERN
ulint
ibuf_flush_count
=
0
;
#ifdef UNIV_IBUF_COUNT_DEBUG
/* Dimensions for the ibuf_count array */
#define IBUF_COUNT_N_SPACES
500
#define IBUF_COUNT_N_PAGES
2
000
#define IBUF_COUNT_N_SPACES
4
#define IBUF_COUNT_N_PAGES
130
000
/* Buffered entry counts for file pages, used in debugging */
static
ulint
ibuf_counts
[
IBUF_COUNT_N_SPACES
][
IBUF_COUNT_N_PAGES
];
...
...
@@ -192,6 +214,22 @@ ibuf_count_check(
# error "IBUF_BITS_PER_PAGE must be an even number!"
#endif
/* Various constants for checking the type of an ibuf record and extracting
data from it. For details, see the description of the record format at the
top of this file. */
#define IBUF_REC_INFO_SIZE 4
/* Combined size of info fields at
the beginning of the fourth field */
/* Offsets for the fields at the beginning of the fourth field */
#define IBUF_REC_OFFSET_COUNTER 0
#define IBUF_REC_OFFSET_TYPE 2
#define IBUF_REC_OFFSET_FLAGS 3
/* Record flag masks */
#define IBUF_REC_COMPACT 0x1
/* Whether the record is compact */
/* The mutex used to block pessimistic inserts to ibuf trees */
static
mutex_t
ibuf_pessimistic_insert_mutex
;
...
...
@@ -230,15 +268,6 @@ because ibuf merge is done to a page when it is read in, and it is
still physically like the index page even if the index would have been
dropped! So, there seems to be no problem. */
#ifdef UNIV_DEBUG
/**********************************************************************
Validates the ibuf data structures when the caller owns ibuf_mutex. */
static
ibool
ibuf_validate_low
(
void
);
/*===================*/
/* out: TRUE if ok */
#endif
/* UNIV_DEBUG */
/**********************************************************************
Sets the flag in the current OS thread local storage denoting that it is
inside an insert buffer routine. */
...
...
@@ -293,17 +322,14 @@ page_t*
ibuf_header_page_get
(
/*=================*/
/* out: insert buffer header page */
ulint
space
,
/* in: space id */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
ut_a
(
space
==
0
);
ut_ad
(
!
ibuf_inside
());
block
=
buf_page_get
(
space
,
0
,
FSP_IBUF_HEADER_PAGE_NO
,
RW_X_LATCH
,
mtr
);
block
=
buf_page_get
(
IBUF_SPACE_ID
,
0
,
FSP_IBUF_HEADER_PAGE_NO
,
RW_X_LATCH
,
mtr
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_IBUF_HEADER
);
...
...
@@ -319,19 +345,17 @@ page_t*
ibuf_tree_root_get
(
/*===============*/
/* out: insert buffer tree root page */
ibuf_data_t
*
data
,
/* in: ibuf data */
ulint
space
,
/* in: space id */
mtr_t
*
mtr
)
/* in: mtr */
{
buf_block_t
*
block
;
ut_a
(
space
==
0
);
ut_ad
(
ibuf_inside
());
mtr_x_lock
(
dict_index_get_lock
(
data
->
index
),
mtr
);
mtr_x_lock
(
dict_index_get_lock
(
ibuf
->
index
),
mtr
);
block
=
buf_page_get
(
IBUF_SPACE_ID
,
0
,
FSP_IBUF_TREE_ROOT_PAGE_NO
,
RW_X_LATCH
,
mtr
);
block
=
buf_page_get
(
space
,
0
,
FSP_IBUF_TREE_ROOT_PAGE_NO
,
RW_X_LATCH
,
mtr
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
#endif
/* UNIV_SYNC_DEBUG */
...
...
@@ -374,116 +398,71 @@ ibuf_count_set(
#endif
/**********************************************************************
Creates the insert buffer data structure at a database startup and initializes
the data structures for the insert buffer. */
UNIV_INTERN
void
ibuf_init_at_db_start
(
void
)
/*=======================*/
{
ibuf
=
mem_alloc
(
sizeof
(
ibuf_t
));
/* Note that also a pessimistic delete can sometimes make a B-tree
grow in size, as the references on the upper levels of the tree can
change */
ibuf
->
max_size
=
buf_pool_get_curr_size
()
/
UNIV_PAGE_SIZE
/
IBUF_POOL_SIZE_PER_MAX_SIZE
;
UT_LIST_INIT
(
ibuf
->
data_list
);
ibuf
->
size
=
0
;
mutex_create
(
&
ibuf_pessimistic_insert_mutex
,
SYNC_IBUF_PESS_INSERT_MUTEX
);
mutex_create
(
&
ibuf_mutex
,
SYNC_IBUF_MUTEX
);
mutex_create
(
&
ibuf_bitmap_mutex
,
SYNC_IBUF_BITMAP_MUTEX
);
fil_ibuf_init_at_db_start
();
}
/**********************************************************************
Updates the size information in an ibuf data, assuming the segment size has
not changed. */
Updates the size information of the ibuf, assuming the segment size has not
changed. */
static
void
ibuf_data_sizes_update
(
/*===================*/
ibuf_data_t
*
data
,
/* in: ibuf data struct */
const
page_t
*
root
,
/* in: ibuf tree root */
ibuf_size_update
(
/*=============*/
page_t
*
root
,
/* in: ibuf tree root */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
old_size
;
#ifdef UNIV_SYNC_DEBUG
ut_ad
(
mutex_own
(
&
ibuf_mutex
));
#endif
/* UNIV_SYNC_DEBUG */
old_size
=
data
->
size
;
data
->
free_list_len
=
flst_get_len
(
root
+
PAGE_HEADER
ibuf
->
free_list_len
=
flst_get_len
(
root
+
PAGE_HEADER
+
PAGE_BTR_IBUF_FREE_LIST
,
mtr
);
data
->
height
=
1
+
btr_page_get_level
(
root
,
mtr
);
ibuf
->
height
=
1
+
btr_page_get_level
(
root
,
mtr
);
data
->
size
=
data
->
seg_size
-
(
1
+
data
->
free_list_len
);
/* the '1 +' is the ibuf header page */
ut_ad
(
data
->
size
<
data
->
seg_size
);
if
(
page_get_n_recs
(
root
)
==
0
)
{
data
->
empty
=
TRUE
;
}
else
{
data
->
empty
=
FALSE
;
}
ut_ad
(
ibuf
->
size
+
data
->
size
>=
old_size
);
ibuf
->
size
=
ibuf
->
size
+
data
->
size
-
old_size
;
ibuf
->
size
=
ibuf
->
seg_size
-
(
1
+
ibuf
->
free_list_len
);
#if 0
fprintf(stderr, "ibuf size %lu, space ibuf size %lu\n",
ibuf->size, data->size);
#endif
ibuf
->
empty
=
page_get_n_recs
(
root
)
==
0
;
}
/**********************************************************************
Creates the insert buffer data struct for a single tablespace. Reads the
root page of the insert buffer tree in the tablespace. This function can
be called only after the dictionary system has been initialized, as this
creates also the insert buffer table and index into this tablespace. */
Creates the insert buffer data structure at a database startup and initializes
the data structures for the insert buffer. */
UNIV_INTERN
ibuf_data_t
*
ibuf_data_init_for_space
(
/*=====================*/
/* out, own: ibuf data struct, linked to the list
in ibuf control structure */
ulint
space
)
/* in: space id */
void
ibuf_init_at_db_start
(
void
)
/*=======================*/
{
ibuf_data_t
*
data
;
page_t
*
root
;
page_t
*
header_page
;
mtr_t
mtr
;
char
*
buf
;
mem_heap_t
*
heap
;
dict_table_t
*
table
;
mem_heap_t
*
heap
;
dict_index_t
*
index
;
ulint
n_used
;
ulint
error
;
page_t
*
header_page
;
ut_a
(
space
==
0
);
ibuf
=
mem_alloc
(
sizeof
(
ibuf_t
));
memset
(
ibuf
,
0
,
sizeof
(
*
ibuf
));
/* Note that also a pessimistic delete can sometimes make a B-tree
grow in size, as the references on the upper levels of the tree can
change */
ibuf
->
max_size
=
buf_pool_get_curr_size
()
/
UNIV_PAGE_SIZE
/
IBUF_POOL_SIZE_PER_MAX_SIZE
;
mutex_create
(
&
ibuf_pessimistic_insert_mutex
,
SYNC_IBUF_PESS_INSERT_MUTEX
);
data
=
mem_alloc
(
sizeof
(
ibuf_data_t
)
);
mutex_create
(
&
ibuf_mutex
,
SYNC_IBUF_MUTEX
);
data
->
space
=
space
;
mutex_create
(
&
ibuf_bitmap_mutex
,
SYNC_IBUF_BITMAP_MUTEX
)
;
mtr_start
(
&
mtr
);
mutex_enter
(
&
ibuf_mutex
);
mtr_x_lock
(
fil_space_get_latch
(
space
,
NULL
),
&
mtr
);
mtr_x_lock
(
fil_space_get_latch
(
IBUF_SPACE_ID
,
NULL
),
&
mtr
);
header_page
=
ibuf_header_page_get
(
space
,
&
mtr
);
header_page
=
ibuf_header_page_get
(
&
mtr
);
fseg_n_reserved_pages
(
header_page
+
IBUF_HEADER
+
IBUF_TREE_SEG_HEADER
,
&
n_used
,
&
mtr
);
...
...
@@ -491,33 +470,23 @@ ibuf_data_init_for_space(
ut_ad
(
n_used
>=
2
);
data
->
seg_size
=
n_used
;
ibuf
->
seg_size
=
n_used
;
{
buf_block_t
*
block
=
buf_page_get
(
space
,
0
,
FSP_IBUF_TREE_ROOT_PAGE_NO
,
buf_block_t
*
block
;
block
=
buf_page_get
(
IBUF_SPACE_ID
,
0
,
FSP_IBUF_TREE_ROOT_PAGE_NO
,
RW_X_LATCH
,
&
mtr
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
#endif
/* UNIV_SYNC_DEBUG */
root
=
buf_block_get_frame
(
block
);
}
data
->
size
=
0
;
data
->
n_inserts
=
0
;
data
->
n_merges
=
0
;
data
->
n_merged_recs
=
0
;
ibuf_data_sizes_update
(
data
,
root
,
&
mtr
);
/*
if (!data->empty) {
fprintf(stderr,
"InnoDB: index entries found in the insert buffer\n");
} else {
fprintf(stderr,
"InnoDB: insert buffer empty\n");
}
*/
ibuf_size_update
(
root
,
&
mtr
);
mutex_exit
(
&
ibuf_mutex
);
mtr_commit
(
&
mtr
);
...
...
@@ -525,42 +494,28 @@ ibuf_data_init_for_space(
ibuf_exit
();
heap
=
mem_heap_create
(
450
);
buf
=
mem_heap_alloc
(
heap
,
50
);
sprintf
(
buf
,
"SYS_IBUF_TABLE_%lu"
,
(
ulong
)
space
);
/* use old-style record format for the insert buffer */
table
=
dict_mem_table_create
(
buf
,
space
,
2
,
0
);
/* Use old-style record format for the insert buffer. */
table
=
dict_mem_table_create
(
IBUF_TABLE_NAME
,
IBUF_SPACE_ID
,
1
,
0
);
dict_mem_table_add_col
(
table
,
heap
,
"PAGE_NO"
,
DATA_BINARY
,
0
,
0
);
dict_mem_table_add_col
(
table
,
heap
,
"TYPES"
,
DATA_BINARY
,
0
,
0
);
dict_mem_table_add_col
(
table
,
heap
,
"DUMMY_COLUMN"
,
DATA_BINARY
,
0
,
0
);
table
->
id
=
ut_dulint_add
(
DICT_IBUF_ID_MIN
,
space
);
table
->
id
=
ut_dulint_add
(
DICT_IBUF_ID_MIN
,
IBUF_SPACE_ID
);
dict_table_add_to_cache
(
table
,
heap
);
mem_heap_free
(
heap
);
index
=
dict_mem_index_create
(
buf
,
"CLUST_IND"
,
space
,
DICT_CLUSTERED
|
DICT_UNIVERSAL
|
DICT_IBUF
,
2
);
dict_mem_index_add_field
(
index
,
"PAGE_NO"
,
0
);
dict_mem_index_add_field
(
index
,
"TYPES"
,
0
);
IBUF_TABLE_NAME
,
"CLUST_IND"
,
IBUF_SPACE_ID
,
DICT_CLUSTERED
|
DICT_UNIVERSAL
|
DICT_IBUF
,
1
);
index
->
id
=
ut_dulint_add
(
DICT_IBUF_ID_MIN
,
space
);
dict_mem_index_add_field
(
index
,
"DUMMY_COLUMN"
,
0
);
error
=
dict_index_add_to_cache
(
table
,
index
,
FSP_IBUF_TREE_ROOT_PAGE_NO
);
ut_a
(
error
==
DB_SUCCESS
);
index
->
id
=
ut_dulint_add
(
DICT_IBUF_ID_MIN
,
IBUF_SPACE_ID
);
d
ata
->
index
=
dict_table_get_first_index
(
table
);
d
ict_index_add_to_cache
(
table
,
index
,
FSP_IBUF_TREE_ROOT_PAGE_NO
);
mutex_enter
(
&
ibuf_mutex
);
UT_LIST_ADD_LAST
(
data_list
,
ibuf
->
data_list
,
data
);
mutex_exit
(
&
ibuf_mutex
);
return
(
data
);
ibuf
->
index
=
dict_table_get_first_index
(
table
);
}
/*************************************************************************
...
...
@@ -605,7 +560,7 @@ ibuf_parse_bitmap_init(
/*===================*/
/* out: end of log record or NULL */
byte
*
ptr
,
/* in: buffer */
byte
*
end_ptr
__attribute__
((
unused
))
,
/* in: buffer end */
byte
*
end_ptr
UNIV_UNUSED
,
/* in: buffer end */
buf_block_t
*
block
,
/* in: block or NULL */
mtr_t
*
mtr
)
/* in: mtr or NULL */
{
...
...
@@ -631,7 +586,7 @@ ibuf_bitmap_page_get_bits(
0 for uncompressed pages */
ulint
bit
,
/* in: IBUF_BITMAP_FREE,
IBUF_BITMAP_BUFFERED, ... */
mtr_t
*
mtr
__attribute__
((
unused
))
)
mtr_t
*
mtr
UNIV_UNUSED
)
/* in: mtr containing an
x-latch to the bitmap page */
{
...
...
@@ -929,10 +884,8 @@ ibuf_update_free_bits_low(
performed to the page */
mtr_t
*
mtr
)
/* in/out: mtr */
{
ulint
before
;
ulint
after
;
ut_a
(
!
buf_block_get_page_zip
(
block
));
ulint
before
;
before
=
ibuf_index_page_calc_free_bits
(
0
,
max_ins_size
);
...
...
@@ -1033,7 +986,7 @@ ibuf_fixed_addr_page(
0 for uncompressed pages */
ulint
page_no
)
/* in: page number */
{
return
((
space
==
0
&&
page_no
==
IBUF_TREE_ROOT_PAGE_NO
)
return
((
space
==
IBUF_SPACE_ID
&&
page_no
==
IBUF_TREE_ROOT_PAGE_NO
)
||
ibuf_bitmap_page
(
zip_size
,
page_no
));
}
...
...
@@ -1046,68 +999,42 @@ ibuf_page(
/* out: TRUE if level 2 or level 3 page */
ulint
space
,
/* in: space id */
ulint
zip_size
,
/* in: compressed page size in bytes, or 0 */
ulint
page_no
)
/* in: page number */
ulint
page_no
,
/* in: page number */
mtr_t
*
mtr
)
/* in: mtr which will contain an x-latch to the
bitmap page if the page is not one of the fixed
address ibuf pages, or NULL, in which case a new
transaction is created. */
{
page_t
*
bitmap_page
;
mtr_t
mtr
;
ibool
ret
;
if
(
recv_no_ibuf_operations
)
{
/* Recovery is running: no ibuf operations should be
performed */
return
(
FALSE
);
}
mtr_t
mtr_local
;
page_t
*
bitmap_page
;
ibool
use_local_mtr
=
(
mtr
==
NULL
);
if
(
ibuf_fixed_addr_page
(
space
,
zip_size
,
page_no
))
{
return
(
TRUE
);
}
if
(
space
!=
0
)
{
/* Currently we only have an ibuf tree in space 0 */
}
else
if
(
space
!=
IBUF_SPACE_ID
)
{
return
(
FALSE
);
}
ut_ad
(
fil_space_get_type
(
space
)
==
FIL_TABLESPACE
);
ut_ad
(
fil_space_get_type
(
IBUF_SPACE_ID
)
==
FIL_TABLESPACE
);
mtr_start
(
&
mtr
);
if
(
use_local_mtr
)
{
mtr
=
&
mtr_local
;
mtr_start
(
mtr
);
}
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
mtr
);
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
mtr
);
ret
=
ibuf_bitmap_page_get_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_IBUF
,
&
mtr
);
mtr_commit
(
&
mtr
);
IBUF_BITMAP_IBUF
,
mtr
);
return
(
ret
);
}
/***************************************************************************
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
UNIV_INTERN
ibool
ibuf_page_low
(
/*==========*/
/* out: TRUE if level 2 or level 3 page */
ulint
space
,
/* in: space id */
ulint
zip_size
,
/* in: compressed page size in bytes, or 0 */
ulint
page_no
,
/* in: page number */
mtr_t
*
mtr
)
/* in: mtr which will contain an x-latch to the
bitmap page if the page is not one of the fixed
address ibuf pages */
{
page_t
*
bitmap_page
;
if
(
ibuf_fixed_addr_page
(
space
,
zip_size
,
page_no
))
{
return
(
TRUE
);
if
(
use_local_mtr
)
{
mtr_commit
(
mtr
);
}
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
mtr
);
return
(
ibuf_bitmap_page_get_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_IBUF
,
mtr
));
return
(
ret
);
}
/************************************************************************
...
...
@@ -1178,6 +1105,185 @@ ibuf_rec_get_space(
return
(
0
);
}
/********************************************************************
Get various information about an ibuf record. */
static
void
ibuf_rec_get_info
(
/*==============*/
const
rec_t
*
rec
,
/* in: ibuf record */
ibuf_op_t
*
op
,
/* out: operation type, or NULL */
ibool
*
comp
,
/* out: compact flag, or NULL */
ulint
*
info_len
,
/* out: length of info fields at the
start of the fourth field, or
NULL */
ulint
*
counter
)
/* in: counter value, or NULL */
{
const
byte
*
types
;
ulint
fields
;
ulint
len
;
ulint
mod
;
/* Local variables to shadow arguments. */
ibuf_op_t
op_local
;
ibool
comp_local
;
ulint
info_len_local
;
ulint
counter_local
;
ut_ad
(
ibuf_inside
());
fields
=
rec_get_n_fields_old
(
rec
);
ut_a
(
fields
>
4
);
types
=
rec_get_nth_field_old
(
rec
,
3
,
&
len
);
mod
=
len
%
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
;
if
(
mod
==
0
)
{
op_local
=
IBUF_OP_INSERT
;
comp_local
=
FALSE
;
info_len_local
=
0
;
ut_ad
(
!
counter
);
}
else
if
(
mod
==
1
)
{
op_local
=
IBUF_OP_INSERT
;
comp_local
=
TRUE
;
info_len_local
=
1
;
ut_ad
(
!
counter
);
}
else
if
(
mod
==
IBUF_REC_INFO_SIZE
)
{
op_local
=
(
ibuf_op_t
)
types
[
IBUF_REC_OFFSET_TYPE
];
comp_local
=
types
[
IBUF_REC_OFFSET_FLAGS
]
&
IBUF_REC_COMPACT
;
info_len_local
=
IBUF_REC_INFO_SIZE
;
counter_local
=
mach_read_from_2
(
types
+
IBUF_REC_OFFSET_COUNTER
);
}
else
{
ut_error
;
}
ut_a
(
op_local
<
IBUF_OP_COUNT
);
ut_a
((
len
-
info_len_local
)
==
(
fields
-
4
)
*
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
);
if
(
op
)
{
*
op
=
op_local
;
}
if
(
comp
)
{
*
comp
=
comp_local
;
}
if
(
info_len
)
{
*
info_len
=
info_len_local
;
}
if
(
counter
)
{
*
counter
=
counter_local
;
}
}
/********************************************************************
Returns the operation type field of an ibuf record. */
static
ibuf_op_t
ibuf_rec_get_op_type
(
/*=================*/
/* out: operation type */
rec_t
*
rec
)
/* in: ibuf record */
{
ulint
len
;
const
byte
*
field
;
ut_ad
(
ibuf_inside
());
ut_ad
(
rec_get_n_fields_old
(
rec
)
>
2
);
field
=
rec_get_nth_field_old
(
rec
,
1
,
&
len
);
if
(
len
>
1
)
{
/* This is a < 4.1.x format record */
return
(
IBUF_OP_INSERT
);
}
else
{
ibuf_op_t
op
;
ibuf_rec_get_info
(
rec
,
&
op
,
NULL
,
NULL
,
NULL
);
return
(
op
);
}
}
/********************************************************************
Read the first two bytes from a record's fourth field (counter field in new
records; something else in older records). */
ulint
ibuf_rec_get_fake_counter
(
/*======================*/
/* out: "counter" field, or ULINT_UNDEFINED if for
some reason it can't be read*/
rec_t
*
rec
)
/* in: ibuf record */
{
byte
*
ptr
;
ulint
len
;
if
(
rec_get_n_fields_old
(
rec
)
<
4
)
{
return
(
ULINT_UNDEFINED
);
}
ptr
=
rec_get_nth_field_old
(
rec
,
3
,
&
len
);
if
(
len
>=
2
)
{
return
(
mach_read_from_2
(
ptr
));
}
else
{
return
(
ULINT_UNDEFINED
);
}
}
/********************************************************************
Add accumulated operation counts to a permanent array. Both arrays must be
of size IBUF_OP_COUNT. */
static
void
ibuf_add_ops
(
/*=========*/
ulint
*
arr
,
/* in/out: array to modify */
ulint
*
ops
)
/* in: operation counts */
{
ulint
i
;
for
(
i
=
0
;
i
<
IBUF_OP_COUNT
;
i
++
)
{
arr
[
i
]
+=
ops
[
i
];
}
}
/********************************************************************
Print operation counts. The array must be of size IBUF_OP_COUNT. */
static
void
ibuf_print_ops
(
/*=========*/
ulint
*
ops
,
/* in: operation counts */
FILE
*
file
)
/* in: file where to print */
{
static
const
char
*
op_names
[]
=
{
"insert"
,
"delete mark"
,
"delete"
};
ulint
i
;
ut_a
(
UT_ARR_SIZE
(
op_names
)
==
IBUF_OP_COUNT
);
for
(
i
=
0
;
i
<
IBUF_OP_COUNT
;
i
++
)
{
fprintf
(
file
,
"%s %lu%s"
,
op_names
[
i
],
(
ulong
)
ops
[
i
],
(
i
<
(
IBUF_OP_COUNT
-
1
))
?
", "
:
""
);
}
}
/************************************************************************
Creates a dummy index for inserting a record to a non-clustered index.
*/
...
...
@@ -1192,12 +1298,11 @@ ibuf_dummy_index_create(
dict_table_t
*
table
;
dict_index_t
*
index
;
table
=
dict_mem_table_create
(
"IBUF_DUMMY"
,
DICT_HDR_SPACE
,
n
,
comp
?
DICT_TF_COMPACT
:
0
);
table
=
dict_mem_table_create
(
"IBUF_DUMMY"
,
DICT_HDR_SPACE
,
n
,
comp
?
DICT_TF_COMPACT
:
0
);
index
=
dict_mem_index_create
(
"IBUF_DUMMY"
,
"IBUF_DUMMY"
,
DICT_HDR_SPACE
,
0
,
n
);
index
=
dict_mem_index_create
(
"IBUF_DUMMY"
,
"IBUF_DUMMY"
,
DICT_HDR_SPACE
,
0
,
n
);
index
->
table
=
table
;
...
...
@@ -1217,12 +1322,14 @@ ibuf_dummy_index_add_col(
ulint
len
)
/* in: length of the column */
{
ulint
i
=
index
->
table
->
n_def
;
dict_mem_table_add_col
(
index
->
table
,
NULL
,
NULL
,
dtype_get_mtype
(
type
),
dtype_get_prtype
(
type
),
dtype_get_len
(
type
));
dict_index_add_col
(
index
,
index
->
table
,
dict_table_get_nth_col
(
index
->
table
,
i
),
len
);
dict_mem_table_add_col
(
index
->
table
,
NULL
,
NULL
,
dtype_get_mtype
(
type
),
dtype_get_prtype
(
type
),
dtype_get_len
(
type
));
dict_index_add_col
(
index
,
index
->
table
,
dict_table_get_nth_col
(
index
->
table
,
i
),
len
);
}
/************************************************************************
Deallocates a dummy index for inserting a record to a non-clustered index.
...
...
@@ -1242,6 +1349,67 @@ ibuf_dummy_index_free(
/*************************************************************************
Builds the entry to insert into a non-clustered index when we have the
corresponding record in an ibuf index. */
UNIV_INLINE
dtuple_t
*
ibuf_build_entry_pre_4_1_x
(
/*=======================*/
/* out, own: entry to insert to
a non-clustered index; NOTE that
as we copy pointers to fields in
ibuf_rec, the caller must hold a
latch to the ibuf_rec page as long
as the entry is used! */
const
rec_t
*
ibuf_rec
,
/* in: record in an insert buffer */
mem_heap_t
*
heap
,
/* in: heap where built */
dict_index_t
**
pindex
)
/* out, own: dummy index that
describes the entry */
{
ulint
i
;
ulint
len
;
const
byte
*
types
;
dtuple_t
*
tuple
;
ulint
n_fields
;
ut_a
(
trx_doublewrite_must_reset_space_ids
);
ut_a
(
!
trx_sys_multiple_tablespace_format
);
n_fields
=
rec_get_n_fields_old
(
ibuf_rec
)
-
2
;
tuple
=
dtuple_create
(
heap
,
n_fields
);
types
=
rec_get_nth_field_old
(
ibuf_rec
,
1
,
&
len
);
ut_a
(
len
==
n_fields
*
DATA_ORDER_NULL_TYPE_BUF_SIZE
);
for
(
i
=
0
;
i
<
n_fields
;
i
++
)
{
const
byte
*
data
;
dfield_t
*
field
;
field
=
dtuple_get_nth_field
(
tuple
,
i
);
data
=
rec_get_nth_field_old
(
ibuf_rec
,
i
+
2
,
&
len
);
dfield_set_data
(
field
,
data
,
len
);
dtype_read_for_order_and_null_size
(
dfield_get_type
(
field
),
types
+
i
*
DATA_ORDER_NULL_TYPE_BUF_SIZE
);
}
*
pindex
=
ibuf_dummy_index_create
(
n_fields
,
FALSE
);
return
(
tuple
);
}
/*************************************************************************
Builds the entry used to
1) IBUF_OP_INSERT: insert into a non-clustered index
2) IBUF_OP_DELETE_MARK: find the record whose delete-mark flag we need to
activate
3) IBUF_OP_DELETE: find the record we need to delete
when we have the corresponding record in an ibuf index. */
static
dtuple_t
*
ibuf_build_entry_from_ibuf_rec
(
...
...
@@ -1263,7 +1431,9 @@ ibuf_build_entry_from_ibuf_rec(
const
byte
*
types
;
const
byte
*
data
;
ulint
len
;
ulint
info_len
;
ulint
i
;
ulint
comp
;
dict_index_t
*
index
;
data
=
rec_get_nth_field_old
(
ibuf_rec
,
1
,
&
len
);
...
...
@@ -1271,29 +1441,7 @@ ibuf_build_entry_from_ibuf_rec(
if
(
len
>
1
)
{
/* This a < 4.1.x format record */
ut_a
(
trx_doublewrite_must_reset_space_ids
);
ut_a
(
!
trx_sys_multiple_tablespace_format
);
n_fields
=
rec_get_n_fields_old
(
ibuf_rec
)
-
2
;
tuple
=
dtuple_create
(
heap
,
n_fields
);
types
=
rec_get_nth_field_old
(
ibuf_rec
,
1
,
&
len
);
ut_a
(
len
==
n_fields
*
DATA_ORDER_NULL_TYPE_BUF_SIZE
);
for
(
i
=
0
;
i
<
n_fields
;
i
++
)
{
field
=
dtuple_get_nth_field
(
tuple
,
i
);
data
=
rec_get_nth_field_old
(
ibuf_rec
,
i
+
2
,
&
len
);
dfield_set_data
(
field
,
data
,
len
);
dtype_read_for_order_and_null_size
(
dfield_get_type
(
field
),
types
+
i
*
DATA_ORDER_NULL_TYPE_BUF_SIZE
);
}
*
pindex
=
ibuf_dummy_index_create
(
n_fields
,
FALSE
);
return
(
tuple
);
return
(
ibuf_build_entry_pre_4_1_x
(
ibuf_rec
,
heap
,
pindex
));
}
/* This a >= 4.1.x format record */
...
...
@@ -1308,16 +1456,12 @@ ibuf_build_entry_from_ibuf_rec(
types
=
rec_get_nth_field_old
(
ibuf_rec
,
3
,
&
len
);
ut_a
(
len
%
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
<=
1
);
index
=
ibuf_dummy_index_create
(
n_fields
,
len
%
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
);
ibuf_rec_get_info
(
ibuf_rec
,
NULL
,
&
comp
,
&
info_len
,
NULL
);
if
(
len
%
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
)
{
/* compact record format */
len
--
;
ut_a
(
*
types
==
0
);
types
++
;
}
index
=
ibuf_dummy_index_create
(
n_fields
,
comp
);
len
-=
info_len
;
types
+=
info_len
;
ut_a
(
len
==
n_fields
*
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
);
...
...
@@ -1343,9 +1487,51 @@ ibuf_build_entry_from_ibuf_rec(
ut_d
(
dict_table_add_system_columns
(
index
->
table
,
index
->
table
->
heap
));
*
pindex
=
index
;
return
(
tuple
);
}
/**********************************************************************
Get the data size. */
UNIV_INLINE
ulint
ibuf_rec_get_size
(
/*==============*/
/* out: size of fields */
const
rec_t
*
rec
,
/* in: ibuf record */
const
byte
*
types
,
/* in: fields */
ulint
n_fields
,
/* in: number of fields */
ibool
new_format
)
/* in: TRUE or FALSE */
{
ulint
i
;
ulint
offset
;
ulint
size
=
0
;
/* 4 for compact record and 2 for old style. */
offset
=
new_format
?
4
:
2
;
for
(
i
=
0
;
i
<
n_fields
;
i
++
)
{
ulint
len
;
const
byte
*
field
;
field
=
rec_get_nth_field_old
(
rec
,
i
+
offset
,
&
len
);
if
(
len
==
UNIV_SQL_NULL
)
{
dtype_t
dtype
;
dtype_read_for_order_and_null_size
(
&
dtype
,
types
+
i
*
DATA_ORDER_NULL_TYPE_BUF_SIZE
);
size
+=
dtype_get_sql_null_size
(
&
dtype
);
}
else
{
size
+=
len
;
}
}
return
(
size
);
}
/************************************************************************
Returns the space taken by a stored non-clustered index entry if converted to
an index record. */
...
...
@@ -1358,14 +1544,12 @@ ibuf_rec_get_volume(
page directory */
const
rec_t
*
ibuf_rec
)
/* in: ibuf record */
{
dtype_t
dtype
;
ibool
new_format
=
FALSE
;
ulint
data_size
=
0
;
ulint
n_fields
;
const
byte
*
types
;
const
byte
*
data
;
ulint
len
;
ulint
i
;
const
byte
*
data
;
const
byte
*
types
;
ulint
n_fields
;
ulint
data_size
=
0
;
ibool
new_format
=
FALSE
;
ut_ad
(
ibuf_inside
());
ut_ad
(
rec_get_n_fields_old
(
ibuf_rec
)
>
2
);
...
...
@@ -1383,54 +1567,52 @@ ibuf_rec_get_volume(
types
=
rec_get_nth_field_old
(
ibuf_rec
,
1
,
&
len
);
ut_ad
(
len
==
n_fields
*
DATA_ORDER_NULL_TYPE_BUF_SIZE
);
}
else
{
/* >= 4.1.x format record */
ibuf_op_t
op
;
ibool
comp
;
ulint
info_len
;
ut_a
(
trx_sys_multiple_tablespace_format
);
ut_a
(
*
data
==
0
);
types
=
rec_get_nth_field_old
(
ibuf_rec
,
3
,
&
len
);
ut_a
(
len
%
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
<=
1
);
if
(
len
%
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
)
{
/* compact record format */
ibuf_rec_get_info
(
ibuf_rec
,
&
op
,
&
comp
,
&
info_len
,
NULL
);
if
(
op
==
IBUF_OP_DELETE_MARK
||
op
==
IBUF_OP_DELETE
)
{
/* Delete-marking a record doesn't take any
additional space, and while deleting a record
actually frees up space, we have to play it safe and
pretend it takes no additional space (the record
might not exist, etc.). */
return
(
0
);
}
else
if
(
comp
)
{
dtuple_t
*
entry
;
ulint
volume
;
dict_index_t
*
dummy_index
;
mem_heap_t
*
heap
=
mem_heap_create
(
500
);
dtuple_t
*
entry
=
ibuf_build_entry_from_ibuf_rec
(
entry
=
ibuf_build_entry_from_ibuf_rec
(
ibuf_rec
,
heap
,
&
dummy_index
);
volume
=
rec_get_converted_size
(
dummy_index
,
entry
,
0
);
ibuf_dummy_index_free
(
dummy_index
);
mem_heap_free
(
heap
);
return
(
volume
+
page_dir_calc_reserved_space
(
1
));
}
types
+=
info_len
;
n_fields
=
rec_get_n_fields_old
(
ibuf_rec
)
-
4
;
new_format
=
TRUE
;
}
for
(
i
=
0
;
i
<
n_fields
;
i
++
)
{
if
(
new_format
)
{
data
=
rec_get_nth_field_old
(
ibuf_rec
,
i
+
4
,
&
len
);
dtype_new_read_for_order_and_null_size
(
&
dtype
,
types
+
i
*
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
);
}
else
{
data
=
rec_get_nth_field_old
(
ibuf_rec
,
i
+
2
,
&
len
);
dtype_read_for_order_and_null_size
(
&
dtype
,
types
+
i
*
DATA_ORDER_NULL_TYPE_BUF_SIZE
);
}
if
(
len
==
UNIV_SQL_NULL
)
{
data_size
+=
dtype_get_sql_null_size
(
&
dtype
);
}
else
{
data_size
+=
len
;
}
}
data_size
=
ibuf_rec_get_size
(
ibuf_rec
,
types
,
n_fields
,
new_format
);
return
(
data_size
+
rec_get_converted_extra_size
(
data_size
,
n_fields
,
0
)
+
page_dir_calc_reserved_space
(
1
));
...
...
@@ -1447,40 +1629,38 @@ ibuf_entry_build(
index tree; NOTE that the original entry
must be kept because we copy pointers to its
fields */
ibuf_op_t
op
,
/* in: operation type */
dict_index_t
*
index
,
/* in: non-clustered index */
const
dtuple_t
*
entry
,
/* in: entry for a non-clustered index */
ulint
space
,
/* in: space id */
ulint
page_no
,
/* in: index page number where entry should
be inserted */
ulint
counter
,
/* in: counter value */
mem_heap_t
*
heap
)
/* in: heap into which to build */
{
dtuple_t
*
tuple
;
dfield_t
*
field
;
const
dfield_t
*
entry_field
;
ulint
n_fields
;
ulint
type_info_size
;
byte
*
buf
;
byte
*
buf2
;
ulint
i
;
/* Starting from 4.1.x, we have to build a tuple whose
(1) first field is the space id,
(2) the second field a single marker byte (0) to tell that this
is a new format record,
(3) the third contains the page number, and
(4) the fourth contains the relevent type information of each data
field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is
(a) 0 for b-trees in the old format, and
(b) 1 for b-trees in the compact format, the first byte of the field
being the marker (0);
(5) and the rest of the fields are copied from entry. All fields
in the tuple are ordered like the type binary in our insert buffer
tree. */
/* We have to build a tuple with the following fields:
1-4) These are described at the top of this file.
5) The rest of the fields are copied from the entry.
All fields in the tuple are ordered like the type binary in our
insert buffer tree. */
n_fields
=
dtuple_get_n_fields
(
entry
);
tuple
=
dtuple_create
(
heap
,
n_fields
+
4
);
/*
Store the space id in tuple
*/
/*
1) Space Id
*/
field
=
dtuple_get_nth_field
(
tuple
,
0
);
...
...
@@ -1490,7 +1670,7 @@ ibuf_entry_build(
dfield_set_data
(
field
,
buf
,
4
);
/*
Store the marker byte field in tupl
e */
/*
2) Marker byt
e */
field
=
dtuple_get_nth_field
(
tuple
,
1
);
...
...
@@ -1502,7 +1682,7 @@ ibuf_entry_build(
dfield_set_data
(
field
,
buf
,
1
);
/*
Store the page number in tuple
*/
/*
3) Page number
*/
field
=
dtuple_get_nth_field
(
tuple
,
2
);
...
...
@@ -1512,14 +1692,20 @@ ibuf_entry_build(
dfield_set_data
(
field
,
buf
,
4
);
/* Store the type info in buf2, and add the fields from entry to
tuple */
buf2
=
mem_heap_alloc
(
heap
,
n_fields
*
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+
dict_table_is_comp
(
index
->
table
));
if
(
dict_table_is_comp
(
index
->
table
))
{
*
buf2
++
=
0
;
/* write the compact format indicator */
}
/* 4) Type info, part #1 */
type_info_size
=
IBUF_REC_INFO_SIZE
+
n_fields
*
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
;
buf2
=
mem_heap_alloc
(
heap
,
type_info_size
);
mach_write_to_2
(
buf2
+
IBUF_REC_OFFSET_COUNTER
,
counter
);
buf2
[
IBUF_REC_OFFSET_TYPE
]
=
(
byte
)
op
;
buf2
[
IBUF_REC_OFFSET_FLAGS
]
=
dict_table_is_comp
(
index
->
table
)
?
IBUF_REC_COMPACT
:
0
;
/* 5+) Fields from the entry */
for
(
i
=
0
;
i
<
n_fields
;
i
++
)
{
ulint
fixed_len
;
const
dict_field_t
*
ifield
;
...
...
@@ -1554,21 +1740,17 @@ ibuf_entry_build(
#endif
/* UNIV_DEBUG */
dtype_new_store_for_order_and_null_size
(
buf2
+
i
*
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
,
buf2
+
IBUF_REC_INFO_SIZE
+
i
*
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
,
dfield_get_type
(
entry_field
),
fixed_len
);
}
/*
Store the type info in buf2 to field 3 of tuple
*/
/*
4) Type info, part #2
*/
field
=
dtuple_get_nth_field
(
tuple
,
3
);
if
(
dict_table_is_comp
(
index
->
table
))
{
buf2
--
;
}
dfield_set_data
(
field
,
buf2
,
type_info_size
);
dfield_set_data
(
field
,
buf2
,
n_fields
*
DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+
dict_table_is_comp
(
index
->
table
));
/* Set all the types in the new tuple binary */
dtuple_set_types_binary
(
tuple
,
n_fields
+
4
);
...
...
@@ -1673,10 +1855,9 @@ Checks if there are enough pages in the free list of the ibuf tree that we
dare to start a pessimistic insert to the insert buffer. */
UNIV_INLINE
ibool
ibuf_data_enough_free_for_insert
(
/*=============================*/
ibuf_data_enough_free_for_insert
(
void
)
/*=============================
=====
*/
/* out: TRUE if enough free pages in list */
ibuf_data_t
*
data
)
/* in: ibuf data for the space */
{
ut_ad
(
mutex_own
(
&
ibuf_mutex
));
...
...
@@ -1686,7 +1867,7 @@ ibuf_data_enough_free_for_insert(
inserts buffered for pages that we read to the buffer pool, without
any risk of running out of free space in the insert buffer. */
return
(
data
->
free_list_len
>=
data
->
size
/
2
+
3
*
data
->
height
);
return
(
ibuf
->
free_list_len
>=
(
ibuf
->
size
/
2
)
+
3
*
ibuf
->
height
);
}
/*************************************************************************
...
...
@@ -1694,14 +1875,13 @@ Checks if there are enough pages in the free list of the ibuf tree that we
should remove them and free to the file space management. */
UNIV_INLINE
ibool
ibuf_data_too_much_free
(
/*====================*/
ibuf_data_too_much_free
(
void
)
/*====================
=====
*/
/* out: TRUE if enough free pages in list */
ibuf_data_t
*
data
)
/* in: ibuf data for the space */
{
ut_ad
(
mutex_own
(
&
ibuf_mutex
));
return
(
data
->
free_list_len
>=
3
+
data
->
size
/
2
+
3
*
data
->
height
);
return
(
ibuf
->
free_list_len
>=
3
+
(
ibuf
->
size
/
2
)
+
3
*
ibuf
->
height
);
}
/*************************************************************************
...
...
@@ -1709,12 +1889,10 @@ Allocates a new page from the ibuf file segment and adds it to the free
list. */
static
ulint
ibuf_add_free_page
(
/*===============*/
ibuf_add_free_page
(
void
)
/*===============
=====
*/
/* out: DB_SUCCESS, or DB_STRONG_FAIL
if no space left */
ulint
space
,
/* in: space id */
ibuf_data_t
*
ibuf_data
)
/* in: ibuf data for the space */
{
mtr_t
mtr
;
page_t
*
header_page
;
...
...
@@ -1724,15 +1902,13 @@ ibuf_add_free_page(
page_t
*
root
;
page_t
*
bitmap_page
;
ut_a
(
space
==
0
);
mtr_start
(
&
mtr
);
/* Acquire the fsp latch before the ibuf header, obeying the latching
order */
mtr_x_lock
(
fil_space_get_latch
(
space
,
&
zip_size
),
&
mtr
);
mtr_x_lock
(
fil_space_get_latch
(
IBUF_SPACE_ID
,
&
zip_size
),
&
mtr
);
header_page
=
ibuf_header_page_get
(
space
,
&
mtr
);
header_page
=
ibuf_header_page_get
(
&
mtr
);
/* Allocate a new page: NOTE that if the page has been a part of a
non-clustered index which has subsequently been dropped, then the
...
...
@@ -1744,9 +1920,10 @@ ibuf_add_free_page(
of a deadlock. This is the reason why we created a special ibuf
header page apart from the ibuf tree. */
page_no
=
fseg_alloc_free_page
(
header_page
+
IBUF_HEADER
+
IBUF_TREE_SEG_HEADER
,
0
,
FSP_UP
,
&
mtr
);
page_no
=
fseg_alloc_free_page
(
header_page
+
IBUF_HEADER
+
IBUF_TREE_SEG_HEADER
,
0
,
FSP_UP
,
&
mtr
);
if
(
page_no
==
FIL_NULL
)
{
mtr_commit
(
&
mtr
);
...
...
@@ -1754,11 +1931,15 @@ ibuf_add_free_page(
}
{
buf_block_t
*
block
=
buf_page_get
(
space
,
0
,
page_no
,
RW_X_LATCH
,
&
mtr
);
buf_block_t
*
block
;
block
=
buf_page_get
(
IBUF_SPACE_ID
,
0
,
page_no
,
RW_X_LATCH
,
&
mtr
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE_NEW
);
#endif
/* UNIV_SYNC_DEBUG */
page
=
buf_block_get_frame
(
block
);
}
...
...
@@ -1766,7 +1947,7 @@ ibuf_add_free_page(
mutex_enter
(
&
ibuf_mutex
);
root
=
ibuf_tree_root_get
(
ibuf_data
,
space
,
&
mtr
);
root
=
ibuf_tree_root_get
(
&
mtr
);
/* Add the page to the free list and update the ibuf size data */
...
...
@@ -1776,16 +1957,18 @@ ibuf_add_free_page(
mlog_write_ulint
(
page
+
FIL_PAGE_TYPE
,
FIL_PAGE_IBUF_FREE_LIST
,
MLOG_2BYTES
,
&
mtr
);
ibuf
_data
->
seg_size
++
;
ibuf
_data
->
free_list_len
++
;
ibuf
->
seg_size
++
;
ibuf
->
free_list_len
++
;
/* Set the bit indicating that this page is now an ibuf tree page
(level 2 page) */
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
mtr
);
bitmap_page
=
ibuf_bitmap_get_map_page
(
IBUF_SPACE_ID
,
page_no
,
zip_size
,
&
mtr
);
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_IBUF
,
TRUE
,
&
mtr
);
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_IBUF
,
TRUE
,
&
mtr
);
mtr_commit
(
&
mtr
);
mutex_exit
(
&
ibuf_mutex
);
...
...
@@ -1799,10 +1982,8 @@ ibuf_add_free_page(
Removes a page from the free list and frees it to the fsp system. */
static
void
ibuf_remove_free_page
(
/*==================*/
ulint
space
,
/* in: space id */
ibuf_data_t
*
ibuf_data
)
/* in: ibuf data for the space */
ibuf_remove_free_page
(
void
)
/*=======================*/
{
mtr_t
mtr
;
mtr_t
mtr2
;
...
...
@@ -1813,15 +1994,13 @@ ibuf_remove_free_page(
page_t
*
root
;
page_t
*
bitmap_page
;
ut_a
(
space
==
0
);
mtr_start
(
&
mtr
);
/* Acquire the fsp latch before the ibuf header, obeying the latching
order */
mtr_x_lock
(
fil_space_get_latch
(
space
,
&
zip_size
),
&
mtr
);
mtr_x_lock
(
fil_space_get_latch
(
IBUF_SPACE_ID
,
&
zip_size
),
&
mtr
);
header_page
=
ibuf_header_page_get
(
space
,
&
mtr
);
header_page
=
ibuf_header_page_get
(
&
mtr
);
/* Prevent pessimistic inserts to insert buffer trees for a while */
mutex_enter
(
&
ibuf_pessimistic_insert_mutex
);
...
...
@@ -1830,7 +2009,7 @@ ibuf_remove_free_page(
mutex_enter
(
&
ibuf_mutex
);
if
(
!
ibuf_data_too_much_free
(
ibuf_data
))
{
if
(
!
ibuf_data_too_much_free
())
{
mutex_exit
(
&
ibuf_mutex
);
...
...
@@ -1845,11 +2024,10 @@ ibuf_remove_free_page(
mtr_start
(
&
mtr2
);
root
=
ibuf_tree_root_get
(
ibuf_data
,
space
,
&
mtr2
);
root
=
ibuf_tree_root_get
(
&
mtr2
);
page_no
=
flst_get_last
(
root
+
PAGE_HEADER
+
PAGE_BTR_IBUF_FREE_LIST
,
&
mtr2
)
.
page
;
&
mtr2
).
page
;
/* NOTE that we must release the latch on the ibuf tree root
because in fseg_free_page we access level 1 pages, and the root
...
...
@@ -1867,26 +2045,31 @@ ibuf_remove_free_page(
page from it. */
fseg_free_page
(
header_page
+
IBUF_HEADER
+
IBUF_TREE_SEG_HEADER
,
space
,
page_no
,
&
mtr
);
IBUF_SPACE_ID
,
page_no
,
&
mtr
);
#ifdef UNIV_DEBUG_FILE_ACCESSES
buf_page_reset_file_page_was_freed
(
space
,
page_no
);
buf_page_reset_file_page_was_freed
(
IBUF_SPACE_ID
,
page_no
);
#endif
ibuf_enter
();
mutex_enter
(
&
ibuf_mutex
);
root
=
ibuf_tree_root_get
(
ibuf_data
,
space
,
&
mtr
);
root
=
ibuf_tree_root_get
(
&
mtr
);
ut_ad
(
page_no
==
flst_get_last
(
root
+
PAGE_HEADER
+
PAGE_BTR_IBUF_FREE_LIST
,
&
mtr
)
.
page
);
+
PAGE_BTR_IBUF_FREE_LIST
,
&
mtr
).
page
);
{
buf_block_t
*
block
=
buf_page_get
(
space
,
0
,
page_no
,
RW_X_LATCH
,
&
mtr
);
buf_block_t
*
block
;
block
=
buf_page_get
(
IBUF_SPACE_ID
,
0
,
page_no
,
RW_X_LATCH
,
&
mtr
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
#endif
/* UNIV_SYNC_DEBUG */
page
=
buf_block_get_frame
(
block
);
}
...
...
@@ -1895,20 +2078,22 @@ ibuf_remove_free_page(
flst_remove
(
root
+
PAGE_HEADER
+
PAGE_BTR_IBUF_FREE_LIST
,
page
+
PAGE_HEADER
+
PAGE_BTR_IBUF_FREE_LIST_NODE
,
&
mtr
);
ibuf
_data
->
seg_size
--
;
ibuf
_data
->
free_list_len
--
;
ibuf
->
seg_size
--
;
ibuf
->
free_list_len
--
;
mutex_exit
(
&
ibuf_pessimistic_insert_mutex
);
/* Set the bit indicating that this page is no more an ibuf tree page
(level 2 page) */
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
mtr
);
bitmap_page
=
ibuf_bitmap_get_map_page
(
IBUF_SPACE_ID
,
page_no
,
zip_size
,
&
mtr
);
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_IBUF
,
FALSE
,
&
mtr
);
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_IBUF
,
FALSE
,
&
mtr
);
#ifdef UNIV_DEBUG_FILE_ACCESSES
buf_page_set_file_page_was_freed
(
space
,
page_no
);
buf_page_set_file_page_was_freed
(
IBUF_SPACE_ID
,
page_no
);
#endif
mtr_commit
(
&
mtr
);
...
...
@@ -1923,39 +2108,28 @@ thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
UNIV_INTERN
void
ibuf_free_excess_pages
(
/*===================*/
ulint
space
)
/* in: compressed page size in bytes, or 0 */
ibuf_free_excess_pages
(
void
)
/*=======================*/
{
ibuf_data_t
*
ibuf_data
;
ulint
i
;
if
(
space
!=
0
)
{
fprintf
(
stderr
,
"InnoDB: Error: calling ibuf_free_excess_pages"
" for space %lu
\n
"
,
(
ulong
)
space
);
return
;
}
#ifdef UNIV_SYNC_DEBUG
ut_ad
(
rw_lock_own
(
fil_space_get_latch
(
space
,
NULL
),
RW_LOCK_EX
));
ut_ad
(
rw_lock_own
(
fil_space_get_latch
(
IBUF_SPACE_ID
,
NULL
),
RW_LOCK_EX
));
#endif
/* UNIV_SYNC_DEBUG */
ut_ad
(
rw_lock_get_x_lock_count
(
fil_space_get_latch
(
space
,
NULL
))
==
1
);
ut_ad
(
rw_lock_get_x_lock_count
(
fil_space_get_latch
(
IBUF_SPACE_ID
,
NULL
))
==
1
);
ut_ad
(
!
ibuf_inside
());
/* NOTE: We require that the thread did not own the latch before,
because then we know that we can obey the correct latching order
for ibuf latches */
ibuf_data
=
fil_space_get_ibuf_data
(
space
);
if
(
ibuf_data
==
NULL
)
{
/* Not yet initialized */
#if 0 /* defined UNIV_DEBUG */
fprintf(stderr,
"Ibuf for space %lu not yet initialized\n", space);
#endif
if
(
!
ibuf
)
{
/* Not yet initialized; not sure if this is possible, but
does no harm to check for it. */
return
;
}
...
...
@@ -1967,7 +2141,7 @@ ibuf_free_excess_pages(
mutex_enter
(
&
ibuf_mutex
);
if
(
!
ibuf_data_too_much_free
(
ibuf_data
))
{
if
(
!
ibuf_data_too_much_free
())
{
mutex_exit
(
&
ibuf_mutex
);
...
...
@@ -1976,7 +2150,7 @@ ibuf_free_excess_pages(
mutex_exit
(
&
ibuf_mutex
);
ibuf_remove_free_page
(
space
,
ibuf_data
);
ibuf_remove_free_page
();
}
}
...
...
@@ -2051,14 +2225,13 @@ ibuf_get_merge_page_nos(
rec_space_id
=
ibuf_rec_get_space
(
rec
);
if
(
rec_space_id
!=
first_space_id
||
rec_page_no
/
IBUF_MERGE_AREA
!=
first_page_no
/
IBUF_MERGE_AREA
)
{
||
(
rec_page_no
/
IBUF_MERGE_AREA
)
!=
(
first_page_no
/
IBUF_MERGE_AREA
)
)
{
break
;
}
}
else
if
(
rec_page_no
!=
prev_page_no
||
rec_space_id
!=
prev_space_id
)
{
if
(
rec_page_no
!=
prev_page_no
||
rec_space_id
!=
prev_space_id
)
{
n_pages
++
;
}
...
...
@@ -2167,11 +2340,7 @@ ibuf_contract_ext(
issued read with the highest tablespace address
to complete */
{
ulint
rnd_pos
;
ibuf_data_t
*
data
;
btr_pcur_t
pcur
;
ulint
space
;
ibool
all_trees_empty
;
ulint
page_nos
[
IBUF_MAX_N_PAGES_MERGED
];
ulint
space_ids
[
IBUF_MAX_N_PAGES_MERGED
];
ib_longlong
space_versions
[
IBUF_MAX_N_PAGES_MERGED
];
...
...
@@ -2180,54 +2349,16 @@ ibuf_contract_ext(
mtr_t
mtr
;
*
n_pages
=
0
;
loop:
ut_ad
(
!
ibuf_inside
());
mutex_enter
(
&
ibuf_mutex
);
ut_ad
(
ibuf_validate_low
());
/* Choose an ibuf tree at random (though there really is only one tree
in the current implementation) */
ibuf_rnd
+=
865558671
;
rnd_pos
=
ibuf_rnd
%
ibuf
->
size
;
all_trees_empty
=
TRUE
;
data
=
UT_LIST_GET_FIRST
(
ibuf
->
data_list
);
for
(;;)
{
if
(
!
data
->
empty
)
{
all_trees_empty
=
FALSE
;
if
(
rnd_pos
<
data
->
size
)
{
break
;
}
rnd_pos
-=
data
->
size
;
}
data
=
UT_LIST_GET_NEXT
(
data_list
,
data
);
if
(
data
==
NULL
)
{
if
(
all_trees_empty
)
{
mutex_exit
(
&
ibuf_mutex
);
return
(
0
);
}
data
=
UT_LIST_GET_FIRST
(
ibuf
->
data_list
);
}
}
mutex_enter
(
&
ibuf_mutex
);
ut_ad
(
data
);
if
(
ibuf
->
empty
)
{
mutex_exit
(
&
ibuf_mutex
);
space
=
data
->
index
->
space
;
return
(
0
);
}
ut_a
(
space
==
0
);
/* We currently only have an ibuf tree in
space 0 */
mtr_start
(
&
mtr
);
ibuf_enter
();
...
...
@@ -2235,13 +2366,16 @@ ibuf_contract_ext(
/* Open a cursor to a randomly chosen leaf of the tree, at a random
position within the leaf */
btr_pcur_open_at_rnd_pos
(
data
->
index
,
BTR_SEARCH_LEAF
,
&
pcur
,
&
mtr
);
btr_pcur_open_at_rnd_pos
(
ibuf
->
index
,
BTR_SEARCH_LEAF
,
&
pcur
,
&
mtr
);
if
(
0
==
page_get_n_recs
(
btr_pcur_get_page
(
&
pcur
)))
{
if
(
page_get_n_recs
(
btr_pcur_get_page
(
&
pcur
))
==
0
)
{
/* When the ibuf tree is emptied completely, the last record
is removed using an optimistic delete and ibuf_size_update
is not called, causing ibuf->empty to remain FALSE. If we do
not reset it to TRUE here then database shutdown will hang
in the loop in ibuf_contract_for_n_pages. */
/* This tree is empty */
data
->
empty
=
TRUE
;
ibuf
->
empty
=
TRUE
;
ibuf_exit
();
...
...
@@ -2250,14 +2384,15 @@ ibuf_contract_ext(
mutex_exit
(
&
ibuf_mutex
);
goto
loop
;
return
(
0
)
;
}
mutex_exit
(
&
ibuf_mutex
);
sum_sizes
=
ibuf_get_merge_page_nos
(
TRUE
,
btr_pcur_get_rec
(
&
pcur
),
space_ids
,
space_versions
,
page_nos
,
&
n_stored
);
sum_sizes
=
ibuf_get_merge_page_nos
(
TRUE
,
btr_pcur_get_rec
(
&
pcur
),
space_ids
,
space_versions
,
page_nos
,
&
n_stored
);
#if 0 /* defined UNIV_IBUF_DEBUG */
fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
sync, n_stored, sum_sizes);
...
...
@@ -2441,13 +2576,18 @@ ibuf_get_volume_buffered(
}
{
buf_block_t
*
block
=
buf_page_get
(
0
,
0
,
prev_page_no
,
RW_X_LATCH
,
mtr
);
buf_block_t
*
block
;
block
=
buf_page_get
(
IBUF_SPACE_ID
,
0
,
prev_page_no
,
RW_X_LATCH
,
mtr
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
#endif
/* UNIV_SYNC_DEBUG */
prev_page
=
buf_block_get_frame
(
block
);
}
#ifdef UNIV_BTR_DEBUG
ut_a
(
btr_page_get_next
(
prev_page
,
mtr
)
==
page_get_page_no
(
page
));
...
...
@@ -2511,16 +2651,20 @@ ibuf_get_volume_buffered(
}
{
buf_block_t
*
block
=
buf_page_get
(
0
,
0
,
next_page_no
,
RW_X_LATCH
,
mtr
);
buf_block_t
*
block
;
block
=
buf_page_get
(
IBUF_SPACE_ID
,
0
,
next_page_no
,
RW_X_LATCH
,
mtr
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
#endif
/* UNIV_SYNC_DEBUG */
next_page
=
buf_block_get_frame
(
block
);
}
#ifdef UNIV_BTR_DEBUG
ut_a
(
btr_page_get_prev
(
next_page
,
mtr
)
==
page_get_page_no
(
page
));
ut_a
(
btr_page_get_prev
(
next_page
,
mtr
)
==
page_get_page_no
(
page
));
#endif
/* UNIV_BTR_DEBUG */
rec
=
page_get_infimum_rec
(
next_page
);
...
...
@@ -2558,22 +2702,18 @@ ibuf_update_max_tablespace_id(void)
const
rec_t
*
rec
;
const
byte
*
field
;
ulint
len
;
ibuf_data_t
*
ibuf_data
;
dict_index_t
*
ibuf_index
;
btr_pcur_t
pcur
;
mtr_t
mtr
;
ibuf_data
=
fil_space_get_ibuf_data
(
0
);
ibuf_index
=
ibuf_data
->
index
;
ut_a
(
!
dict_table_is_comp
(
ibuf_index
->
table
));
ut_a
(
!
dict_table_is_comp
(
ibuf
->
index
->
table
));
ibuf_enter
();
mtr_start
(
&
mtr
);
btr_pcur_open_at_index_side
(
FALSE
,
ibuf_index
,
BTR_SEARCH_LEAF
,
&
pcur
,
TRUE
,
&
mtr
);
btr_pcur_open_at_index_side
(
FALSE
,
ibuf
->
index
,
BTR_SEARCH_LEAF
,
&
pcur
,
TRUE
,
&
mtr
);
btr_pcur_move_to_prev
(
&
pcur
,
&
mtr
);
if
(
btr_pcur_is_before_first_on_page
(
&
pcur
))
{
...
...
@@ -2598,6 +2738,165 @@ ibuf_update_max_tablespace_id(void)
fil_set_max_space_id_if_bigger
(
max_space_id
);
}
/********************************************************************
Helper function for ibuf_set_entry_counter. Checks if rec is for (space,
page_no), and if so, reads counter value from it and returns that + 1.
Otherwise, returns 0. */
static
ulint
ibuf_set_entry_counter_low
(
/*=======================*/
/* out: new counter value */
rec_t
*
rec
,
/* in: record */
ulint
space
,
/* in: space id */
ulint
page_no
)
/* in: page number */
{
ulint
counter
;
if
(
ibuf_rec_get_space
(
rec
)
==
space
&&
ibuf_rec_get_page_no
(
rec
)
==
page_no
)
{
ibuf_rec_get_info
(
rec
,
NULL
,
NULL
,
NULL
,
&
counter
);
ut_a
(
counter
<
0xFFFF
);
counter
++
;
}
else
{
/* No entries in ibuf tree for (space, page_no). */
counter
=
0
;
}
return
(
counter
);
}
/********************************************************************
Set the counter field in entry to the correct value based on the current
last record in ibuf for (space, page_no). */
static
ibool
ibuf_set_entry_counter
(
/*===================*/
/* out: FALSE if we should abort
this insertion to ibuf */
dtuple_t
*
entry
,
/* in: entry to patch */
ulint
space
,
/* in: space id of entry */
ulint
page_no
,
/* in: page number of entry */
btr_pcur_t
*
pcur
,
/* in: pcur positioned on the record
found by btr_pcur_open(.., entry,
PAGE_CUR_LE, ..., pcur, ...) */
ibool
is_optimistic
,
/* in: is this an optimistic insert */
mtr_t
*
mtr
)
/* in: mtr */
{
ulint
counter
=
0xFFFF
+
1
;
dfield_t
*
field
;
void
*
data
;
/* FIXME: if pcur (or the previous rec if we're on infimum) points
to a record that has no counter field, return FALSE since we can't
mix records with counters with records without counters. */
/* pcur points to either a user rec or to a page's infimum record. */
if
(
btr_pcur_is_on_user_rec
(
pcur
))
{
counter
=
ibuf_set_entry_counter_low
(
btr_pcur_get_rec
(
pcur
),
space
,
page_no
);
}
else
if
(
btr_pcur_is_before_first_in_tree
(
pcur
,
mtr
))
{
/* Ibuf tree is either completely empty, or the insert
position is at the very first record of a non-empty tree. In
either case we have no previous records for (space,
page_no). */
counter
=
0
;
}
else
if
(
btr_pcur_is_before_first_on_page
(
pcur
))
{
btr_cur_t
*
cursor
=
btr_pcur_get_btr_cur
(
pcur
);
if
(
cursor
->
low_match
<
3
)
{
/* If low_match < 3, we know that the father node
pointer did not contain the searched for (space,
page_no), which means that the search ended on the
right page regardless of the counter value, and
since we're at the infimum record, there are no
existing records. */
counter
=
0
;
}
else
{
rec_t
*
rec
;
page_t
*
page
;
buf_block_t
*
block
;
page_t
*
prev_page
;
ulint
prev_page_no
;
ut_a
(
cursor
->
ibuf_cnt
!=
ULINT_UNDEFINED
);
page
=
btr_pcur_get_page
(
pcur
);
prev_page_no
=
btr_page_get_prev
(
page
,
mtr
);
ut_ad
(
prev_page_no
!=
FIL_NULL
);
block
=
buf_page_get
(
IBUF_SPACE_ID
,
0
,
prev_page_no
,
RW_X_LATCH
,
mtr
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
#endif
/* UNIV_SYNC_DEBUG */
prev_page
=
buf_block_get_frame
(
block
);
rec
=
page_rec_get_prev
(
page_get_supremum_rec
(
prev_page
));
ut_ad
(
page_rec_is_user_rec
(
rec
));
counter
=
ibuf_set_entry_counter_low
(
rec
,
space
,
page_no
);
if
(
counter
<
cursor
->
ibuf_cnt
)
{
/* Search ended on the wrong page. */
if
(
is_optimistic
)
{
/* In an optimistic insert, we can
shift the insert position to the left
page, since it only needs an X-latch
on the page itself, which the
original search acquired for us. */
btr_cur_position
(
ibuf
->
index
,
rec
,
block
,
btr_pcur_get_btr_cur
(
pcur
));
}
else
{
/* We can't shift the insert
position to the left page in a
pessimistic insert since it would
require an X-latch on the left
page's left page, so we have to
abort. */
return
(
FALSE
);
}
}
else
{
/* The counter field in the father node is
the same as we would insert; we don't know
whether the insert should go to this page or
the left page (the later fields can differ),
so refuse the insert. */
return
(
FALSE
);
}
}
}
/* Patch counter value in already built entry. */
field
=
dtuple_get_nth_field
(
entry
,
3
);
data
=
dfield_get_data
(
field
);
mach_write_to_2
((
byte
*
)
data
+
IBUF_REC_OFFSET_COUNTER
,
counter
);
return
(
TRUE
);
}
/*************************************************************************
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. */
...
...
@@ -2607,6 +2906,7 @@ ibuf_insert_low(
/*============*/
/* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */
ulint
mode
,
/* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
ibuf_op_t
op
,
/* in: operation type */
const
dtuple_t
*
entry
,
/* in: index entry to insert */
ulint
entry_size
,
/* in: rec_get_converted_size(index, entry) */
...
...
@@ -2626,8 +2926,6 @@ ibuf_insert_low(
rec_t
*
ins_rec
;
ibool
old_bit_value
;
page_t
*
bitmap_page
;
ibuf_data_t
*
ibuf_data
;
dict_index_t
*
ibuf_index
;
page_t
*
root
;
ulint
err
;
ibool
do_merge
;
...
...
@@ -2642,18 +2940,12 @@ ibuf_insert_low(
ut_a
(
!
dict_index_is_clust
(
index
));
ut_ad
(
dtuple_check_typed
(
entry
));
ut_ad
(
ut_is_2pow
(
zip_size
));
ut_a
(
op
<
IBUF_OP_COUNT
);
ut_a
(
trx_sys_multiple_tablespace_format
);
do_merge
=
FALSE
;
/* Currently the insert buffer of space 0 takes care of inserts to all
tablespaces */
ibuf_data
=
fil_space_get_ibuf_data
(
0
);
ibuf_index
=
ibuf_data
->
index
;
mutex_enter
(
&
ibuf_mutex
);
if
(
ibuf
->
size
>=
ibuf
->
max_size
+
IBUF_CONTRACT_DO_NOT_INSERT
)
{
...
...
@@ -2680,7 +2972,7 @@ ibuf_insert_low(
mutex_enter
(
&
ibuf_mutex
);
while
(
!
ibuf_data_enough_free_for_insert
(
ibuf_data
))
{
while
(
!
ibuf_data_enough_free_for_insert
())
{
mutex_exit
(
&
ibuf_mutex
);
...
...
@@ -2688,7 +2980,7 @@ ibuf_insert_low(
mutex_exit
(
&
ibuf_pessimistic_insert_mutex
);
err
=
ibuf_add_free_page
(
0
,
ibuf_data
);
err
=
ibuf_add_free_page
();
if
(
err
==
DB_STRONG_FAIL
)
{
...
...
@@ -2707,11 +2999,16 @@ ibuf_insert_low(
heap
=
mem_heap_create
(
512
);
/* Build the entry which contains the space id and the page number as
the first fields and the type information for other fields, and which
will be inserted to the insert buffer. */
/* Build the entry which contains the space id and the page number
as the first fields and the type information for other fields, and
which will be inserted to the insert buffer. Using a counter value
of 0xFFFF we find the last record for (space, page_no), from which
we can then read the counter value N and use N + 1 in the record we
insert. (We patch the ibuf_entry's counter field to the correct
value just before actually inserting the entry.) */
ibuf_entry
=
ibuf_entry_build
(
index
,
entry
,
space
,
page_no
,
heap
);
ibuf_entry
=
ibuf_entry_build
(
op
,
index
,
entry
,
space
,
page_no
,
0xFFFF
,
heap
);
/* Open a cursor to the insert buffer tree to calculate if we can add
the new entry to it without exceeding the free space limit for the
...
...
@@ -2719,7 +3016,15 @@ ibuf_insert_low(
mtr_start
(
&
mtr
);
btr_pcur_open
(
ibuf_index
,
ibuf_entry
,
PAGE_CUR_LE
,
mode
,
&
pcur
,
&
mtr
);
btr_pcur_open
(
ibuf
->
index
,
ibuf_entry
,
PAGE_CUR_LE
,
mode
,
&
pcur
,
&
mtr
);
/* Don't buffer deletes if the page has been read in to the buffer
pool. */
if
(
op
==
IBUF_OP_DELETE
&&
buf_pool_watch_happened
(
space
,
page_no
))
{
err
=
DB_STRONG_FAIL
;
goto
function_exit
;
}
/* Find out the volume of already buffered inserts for the same index
page */
...
...
@@ -2730,8 +3035,8 @@ ibuf_insert_low(
#endif
mtr_start
(
&
bitmap_mtr
);
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
bitmap_mtr
);
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
bitmap_mtr
);
/* We check if the index page is suitable for buffered entries */
...
...
@@ -2744,21 +3049,35 @@ ibuf_insert_low(
goto
function_exit
;
}
bits
=
ibuf_bitmap_page_get_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_FREE
,
&
bitmap_mtr
);
bits
=
ibuf_bitmap_page_get_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_FREE
,
&
bitmap_mtr
);
if
(
buffered
+
entry_size
+
page_dir_calc_reserved_space
(
1
)
>
ibuf_index_page_calc_free_from_bits
(
zip_size
,
bits
))
{
mtr_commit
(
&
bitmap_mtr
);
/* It may not fit */
err
=
DB_STRONG_FAIL
;
mtr_commit
(
&
bitmap_mtr
);
do_merge
=
TRUE
;
ibuf_get_merge_page_nos
(
FALSE
,
btr_pcur_get_rec
(
&
pcur
),
space_ids
,
space_versions
,
page_nos
,
&
n_stored
);
ibuf_get_merge_page_nos
(
FALSE
,
btr_pcur_get_rec
(
&
pcur
),
space_ids
,
space_versions
,
page_nos
,
&
n_stored
);
goto
function_exit
;
}
/* Patch correct counter value to the entry to insert. This can
change the insert position, which can result in the need to abort in
some cases. */
if
(
!
ibuf_set_entry_counter
(
ibuf_entry
,
space
,
page_no
,
&
pcur
,
mode
==
BTR_MODIFY_PREV
,
&
mtr
))
{
err
=
DB_STRONG_FAIL
;
mtr_commit
(
&
bitmap_mtr
);
goto
function_exit
;
}
...
...
@@ -2768,6 +3087,7 @@ ibuf_insert_low(
old_bit_value
=
ibuf_bitmap_page_get_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_BUFFERED
,
&
bitmap_mtr
);
if
(
!
old_bit_value
)
{
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_BUFFERED
,
TRUE
,
...
...
@@ -2795,7 +3115,7 @@ ibuf_insert_low(
which would cause the x-latching of the root after that to
break the latching order. */
root
=
ibuf_tree_root_get
(
ibuf_data
,
0
,
&
mtr
);
root
=
ibuf_tree_root_get
(
&
mtr
);
err
=
btr_cur_pessimistic_insert
(
BTR_NO_LOCKING_FLAG
|
BTR_NO_UNDO_LOG_FLAG
,
...
...
@@ -2808,7 +3128,7 @@ ibuf_insert_low(
thr_get_trx
(
thr
)
->
id
);
}
ibuf_
data_sizes_update
(
ibuf_data
,
root
,
&
mtr
);
ibuf_
size_update
(
root
,
&
mtr
);
}
function_exit:
...
...
@@ -2824,7 +3144,6 @@ ibuf_insert_low(
}
#endif
if
(
mode
==
BTR_MODIFY_TREE
)
{
ut_ad
(
ibuf_validate_low
());
mutex_exit
(
&
ibuf_mutex
);
mutex_exit
(
&
ibuf_pessimistic_insert_mutex
);
...
...
@@ -2839,8 +3158,7 @@ ibuf_insert_low(
mutex_enter
(
&
ibuf_mutex
);
if
(
err
==
DB_SUCCESS
)
{
ibuf_data
->
empty
=
FALSE
;
ibuf_data
->
n_inserts
++
;
ibuf
->
empty
=
FALSE
;
}
mutex_exit
(
&
ibuf_mutex
);
...
...
@@ -2861,14 +3179,15 @@ ibuf_insert_low(
}
/*************************************************************************
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. Does not do insert if the index is clustered
or unique. */
Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible. Does not do it if the index
is clustered
or unique. */
UNIV_INTERN
ibool
ibuf_insert
(
/*========*/
/* out: TRUE if success */
ibuf_op_t
op
,
/* in: operation type */
const
dtuple_t
*
entry
,
/* in: index entry to insert */
dict_index_t
*
index
,
/* in: index where to insert */
ulint
space
,
/* in: space id where to insert */
...
...
@@ -2878,25 +3197,26 @@ ibuf_insert(
{
ulint
err
;
ulint
entry_size
;
ibool
comp
=
dict_table_is_comp
(
index
->
table
);
ut_a
(
trx_sys_multiple_tablespace_format
);
ut_ad
(
dtuple_check_typed
(
entry
));
ut_ad
(
ut_is_2pow
(
zip_size
));
ut_a
(
op
<
IBUF_OP_COUNT
);
ut_a
(
!
dict_index_is_clust
(
index
));
entry_size
=
rec_get_converted_size
(
index
,
entry
,
0
);
if
(
entry_size
>=
(
page_get_free_space_of_empty
(
dict_table_is_comp
(
index
->
table
))
/
2
))
{
if
(
entry_size
>=
(
page_get_free_space_of_empty
(
comp
)
/
2
))
{
return
(
FALSE
);
}
err
=
ibuf_insert_low
(
BTR_MODIFY_PREV
,
entry
,
entry_size
,
err
=
ibuf_insert_low
(
BTR_MODIFY_PREV
,
op
,
entry
,
entry_size
,
index
,
space
,
zip_size
,
page_no
,
thr
);
if
(
err
==
DB_FAIL
)
{
err
=
ibuf_insert_low
(
BTR_MODIFY_TREE
,
entry
,
entry_size
,
err
=
ibuf_insert_low
(
BTR_MODIFY_TREE
,
op
,
entry
,
entry_size
,
index
,
space
,
zip_size
,
page_no
,
thr
);
}
...
...
@@ -2970,8 +3290,8 @@ ibuf_insert_to_index_page(
return
;
}
low_match
=
page_cur_search
(
block
,
index
,
entry
,
PAGE_CUR_LE
,
&
page_cur
);
low_match
=
page_cur_search
(
block
,
index
,
entry
,
PAGE_CUR_LE
,
&
page_cur
);
if
(
low_match
==
dtuple_get_n_fields
(
entry
))
{
buf_block_t
*
block
;
...
...
@@ -2981,7 +3301,7 @@ ibuf_insert_to_index_page(
block
=
page_cur_get_block
(
&
page_cur
);
page_zip
=
buf_block_get_page_zip
(
block
);
btr_cur_
del_unmark_for_ibuf
(
rec
,
page_zip
,
mtr
);
btr_cur_
set_deleted_flag_for_ibuf
(
rec
,
page_zip
,
FALSE
,
mtr
);
}
else
{
rec
=
page_cur_tuple_insert
(
&
page_cur
,
entry
,
index
,
0
,
mtr
);
...
...
@@ -3043,6 +3363,100 @@ ibuf_insert_to_index_page(
}
}
/********************************************************************
During merge, sets the delete mark on a record for a secondary index
entry. */
static
void
ibuf_set_del_mark
(
/*==============*/
dtuple_t
*
entry
,
/* in: entry */
buf_block_t
*
block
,
/* in: block */
dict_index_t
*
index
,
/* in: record descriptor */
mtr_t
*
mtr
)
/* in: mtr */
{
page_cur_t
page_cur
;
ulint
low_match
;
ut_ad
(
ibuf_inside
());
ut_ad
(
dtuple_check_typed
(
entry
));
low_match
=
page_cur_search
(
block
,
index
,
entry
,
PAGE_CUR_LE
,
&
page_cur
);
if
(
low_match
==
dtuple_get_n_fields
(
entry
))
{
rec_t
*
rec
;
page_zip_des_t
*
page_zip
;
rec
=
page_cur_get_rec
(
&
page_cur
);
block
=
page_cur_get_block
(
&
page_cur
);
page_zip
=
buf_block_get_page_zip
(
block
);
btr_cur_set_deleted_flag_for_ibuf
(
rec
,
page_zip
,
TRUE
,
mtr
);
}
else
{
/* This can happen benignly in some situations. */
}
}
/********************************************************************
During merge, delete a record for a secondary index entry. */
static
void
ibuf_delete
(
/*========*/
dtuple_t
*
entry
,
/* in: entry */
buf_block_t
*
block
,
/* in: block */
dict_index_t
*
index
,
/* in: record descriptor */
mtr_t
*
mtr
)
/* in: mtr */
{
page_cur_t
page_cur
;
ulint
low_match
;
ut_ad
(
ibuf_inside
());
ut_ad
(
dtuple_check_typed
(
entry
));
low_match
=
page_cur_search
(
block
,
index
,
entry
,
PAGE_CUR_LE
,
&
page_cur
);
if
(
low_match
==
dtuple_get_n_fields
(
entry
))
{
page_t
*
page
;
rec_t
*
rec
=
page_cur_get_rec
(
&
page_cur
);
/* TODO: the below should probably be a separate function,
it's a bastardized version of btr_cur_optimistic_delete. */
ulint
offsets_
[
REC_OFFS_NORMAL_SIZE
];
ulint
*
offsets
=
offsets_
;
mem_heap_t
*
heap
=
NULL
;
ulint
max_ins_size
;
rec_offs_init
(
offsets_
);
offsets
=
rec_get_offsets
(
rec
,
index
,
offsets
,
ULINT_UNDEFINED
,
&
heap
);
lock_update_delete
(
block
,
rec
);
page
=
buf_block_get_frame
(
block
);
max_ins_size
=
page_get_max_insert_size_after_reorganize
(
page
,
1
);
page_cur_delete_rec
(
&
page_cur
,
index
,
offsets
,
mtr
);
ibuf_update_free_bits_low
(
block
,
max_ins_size
,
mtr
);
if
(
UNIV_LIKELY_NULL
(
heap
))
{
mem_heap_free
(
heap
);
}
}
else
{
/* This can happen benignly in some situations: either when
we crashed at just the right time, or on database startup
when we redo some old log entries (due to worse stored
position granularity on disk than in memory). */
}
}
/*************************************************************************
Deletes from ibuf the record on which pcur is positioned. If we have to
resort to a pessimistic delete, this function commits mtr and closes
...
...
@@ -3063,7 +3477,6 @@ ibuf_delete_rec(
mtr_t
*
mtr
)
/* in: mtr */
{
ibool
success
;
ibuf_data_t
*
ibuf_data
;
page_t
*
root
;
ulint
err
;
...
...
@@ -3088,11 +3501,6 @@ ibuf_delete_rec(
btr_pcur_commit_specify_mtr
(
pcur
,
mtr
);
/* Currently the insert buffer of space 0 takes care of inserts to all
tablespaces */
ibuf_data
=
fil_space_get_ibuf_data
(
0
);
mutex_enter
(
&
ibuf_mutex
);
mtr_start
(
mtr
);
...
...
@@ -3119,7 +3527,7 @@ ibuf_delete_rec(
btr_pcur_commit_specify_mtr
(
pcur
,
mtr
);
fputs
(
"InnoDB: Validating insert buffer tree:
\n
"
,
stderr
);
if
(
!
btr_validate_index
(
ibuf
_data
->
index
,
NULL
))
{
if
(
!
btr_validate_index
(
ibuf
->
index
,
NULL
))
{
ut_error
;
}
...
...
@@ -3133,7 +3541,7 @@ ibuf_delete_rec(
return
(
TRUE
);
}
root
=
ibuf_tree_root_get
(
ibuf_data
,
0
,
mtr
);
root
=
ibuf_tree_root_get
(
mtr
);
btr_cur_pessimistic_delete
(
&
err
,
TRUE
,
btr_pcur_get_btr_cur
(
pcur
),
FALSE
,
mtr
);
...
...
@@ -3144,9 +3552,7 @@ ibuf_delete_rec(
#else
UT_NOT_USED
(
space
);
#endif
ibuf_data_sizes_update
(
ibuf_data
,
root
,
mtr
);
ut_ad
(
ibuf_validate_low
());
ibuf_size_update
(
root
,
mtr
);
btr_pcur_commit_specify_mtr
(
pcur
,
mtr
);
...
...
@@ -3159,11 +3565,11 @@ ibuf_delete_rec(
/*************************************************************************
When an index page is read from a disk to the buffer pool, this function
inserts to the page the possible index entries buffered in the insert buffer.
The entries are deleted from the insert buffer. If the page is not read, but
created in the buffer pool, this function deletes its buffered entries from
the insert buffer; there can exist entries for such a page if the page
belonged to an index which
subsequently was dropped. */
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
function deletes its buffered entries from the insert buffer; there can
exist entries for such a page if the page belonged to an index which
subsequently was dropped. */
UNIV_INTERN
void
ibuf_merge_or_delete_for_page
(
...
...
@@ -3183,12 +3589,7 @@ ibuf_merge_or_delete_for_page(
{
mem_heap_t
*
heap
;
btr_pcur_t
pcur
;
dtuple_t
*
entry
;
dtuple_t
*
search_tuple
;
rec_t
*
ibuf_rec
;
page_t
*
bitmap_page
;
ibuf_data_t
*
ibuf_data
;
ulint
n_inserts
;
#ifdef UNIV_IBUF_DEBUG
ulint
volume
;
#endif
...
...
@@ -3197,6 +3598,10 @@ ibuf_merge_or_delete_for_page(
ibool
corruption_noticed
=
FALSE
;
mtr_t
mtr
;
/* Counts for merged & discarded operations. */
ulint
mops
[
IBUF_OP_COUNT
];
ulint
dops
[
IBUF_OP_COUNT
];
ut_ad
(
!
block
||
buf_block_get_space
(
block
)
==
space
);
ut_ad
(
!
block
||
buf_block_get_page_no
(
block
)
==
page_no
);
ut_ad
(
!
block
||
buf_block_get_zip_size
(
block
)
==
zip_size
);
...
...
@@ -3204,24 +3609,24 @@ ibuf_merge_or_delete_for_page(
if
(
srv_force_recovery
>=
SRV_FORCE_NO_IBUF_MERGE
)
{
return
;
}
}
else
if
(
trx_sys_hdr_page
(
space
,
page_no
))
{
if
(
trx_sys_hdr_page
(
space
,
page_no
))
{
return
;
}
}
else
if
(
ibuf_fixed_addr_page
(
space
,
0
,
page_no
)
||
fsp_descr_page
(
0
,
page_no
))
{
/* The following assumes that the uncompressed page size
is a power-of-2 multiple of zip_size. */
if
(
ibuf_fixed_addr_page
(
space
,
0
,
page_no
)
||
fsp_descr_page
(
0
,
page_no
))
{
/* This assumes that the uncompressed page size
is a power-of-2 multiple of zip_size. */
return
;
}
if
(
UNIV_LIKELY
(
update_ibuf_bitmap
))
{
ut_a
(
ut_is_2pow
(
zip_size
));
if
(
ibuf_fixed_addr_page
(
space
,
zip_size
,
page_no
)
||
fsp_descr_page
(
zip_size
,
page_no
))
{
return
;
}
...
...
@@ -3239,9 +3644,12 @@ ibuf_merge_or_delete_for_page(
block
=
NULL
;
update_ibuf_bitmap
=
FALSE
;
}
else
{
page_t
*
bitmap_page
;
mtr_start
(
&
mtr
);
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
mtr
);
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
mtr
);
if
(
!
ibuf_bitmap_page_get_bits
(
bitmap_page
,
page_no
,
zip_size
,
...
...
@@ -3258,17 +3666,12 @@ ibuf_merge_or_delete_for_page(
}
mtr_commit
(
&
mtr
);
}
}
else
if
(
block
)
{
if
(
ibuf_fixed_addr_page
(
space
,
zip_size
,
page_no
)
||
fsp_descr_page
(
zip_size
,
page_no
))
{
return
;
}
}
}
else
if
(
block
&&
(
ibuf_fixed_addr_page
(
space
,
zip_size
,
page_no
)
||
fsp_descr_page
(
zip_size
,
page_no
)))
{
/* Currently the insert buffer of space 0 takes care of inserts to all
tablespaces */
ibuf_data
=
fil_space_get_ibuf_data
(
0
);
return
;
}
ibuf_enter
();
...
...
@@ -3294,6 +3697,8 @@ ibuf_merge_or_delete_for_page(
if
(
UNIV_UNLIKELY
(
fil_page_get_type
(
block
->
frame
)
!=
FIL_PAGE_INDEX
))
{
page_t
*
bitmap_page
;
corruption_noticed
=
TRUE
;
ut_print_timestamp
(
stderr
);
...
...
@@ -3334,7 +3739,9 @@ ibuf_merge_or_delete_for_page(
}
}
n_inserts
=
0
;
memset
(
mops
,
0
,
sizeof
(
mops
));
memset
(
dops
,
0
,
sizeof
(
dops
));
#ifdef UNIV_IBUF_DEBUG
volume
=
0
;
#endif
...
...
@@ -3342,11 +3749,14 @@ ibuf_merge_or_delete_for_page(
mtr_start
(
&
mtr
);
if
(
block
)
{
ibool
success
=
buf_page_get_known_nowait
(
RW_X_LATCH
,
block
,
BUF_KEEP_OLD
,
__FILE__
,
__LINE__
,
&
mtr
);
ibool
success
;
success
=
buf_page_get_known_nowait
(
RW_X_LATCH
,
block
,
BUF_KEEP_OLD
,
__FILE__
,
__LINE__
,
&
mtr
);
ut_a
(
success
);
#ifdef UNIV_SYNC_DEBUG
buf_block_dbg_add_level
(
block
,
SYNC_TREE_NODE
);
#endif
/* UNIV_SYNC_DEBUG */
...
...
@@ -3354,8 +3764,10 @@ ibuf_merge_or_delete_for_page(
/* Position pcur in the insert buffer at the first entry for this
index page */
btr_pcur_open_on_user_rec
(
ibuf_data
->
index
,
search_tuple
,
PAGE_CUR_GE
,
BTR_MODIFY_LEAF
,
&
pcur
,
&
mtr
);
btr_pcur_open_on_user_rec
(
ibuf
->
index
,
search_tuple
,
PAGE_CUR_GE
,
BTR_MODIFY_LEAF
,
&
pcur
,
&
mtr
);
if
(
!
btr_pcur_is_on_user_rec
(
&
pcur
))
{
ut_ad
(
btr_pcur_is_after_last_in_tree
(
&
pcur
,
&
mtr
));
...
...
@@ -3363,50 +3775,82 @@ ibuf_merge_or_delete_for_page(
}
for
(;;)
{
rec_t
*
rec
;
ut_ad
(
btr_pcur_is_on_user_rec
(
&
pcur
));
ibuf_
rec
=
btr_pcur_get_rec
(
&
pcur
);
rec
=
btr_pcur_get_rec
(
&
pcur
);
/* Check if the entry is for this index page */
if
(
ibuf_rec_get_page_no
(
ibuf_rec
)
!=
page_no
||
ibuf_rec_get_space
(
ibuf_rec
)
!=
space
)
{
if
(
ibuf_rec_get_page_no
(
rec
)
!=
page_no
||
ibuf_rec_get_space
(
rec
)
!=
space
)
{
if
(
block
)
{
page_header_reset_last_insert
(
block
->
frame
,
page_zip
,
&
mtr
);
}
goto
reset_bit
;
}
if
(
UNIV_UNLIKELY
(
corruption_noticed
))
{
fputs
(
"InnoDB: Discarding record
\n
"
,
stderr
);
rec_print_old
(
stderr
,
ibuf_
rec
);
rec_print_old
(
stderr
,
rec
);
fputs
(
"
\n
InnoDB: from the insert buffer!
\n\n
"
,
stderr
);
}
else
if
(
block
)
{
/* Now we have at pcur a record which should be
inserted to the index page; NOTE that the call below
copies pointers to fields in
ibuf_
rec, and we must
keep the latch to the
ibuf_
rec page until the
copies pointers to fields in rec, and we must
keep the latch to the rec page until the
insertion is finished! */
dtuple_t
*
entry
;
dulint
max_trx_id
;
dict_index_t
*
dummy_index
;
dulint
max_trx_id
=
page_get_max_trx_id
(
page_align
(
ibuf_rec
));
ibuf_op_t
op
=
ibuf_rec_get_op_type
(
rec
);
max_trx_id
=
page_get_max_trx_id
(
page_align
(
rec
));
page_update_max_trx_id
(
block
,
page_zip
,
max_trx_id
);
entry
=
ibuf_build_entry_from_ibuf_rec
(
ibuf_
rec
,
heap
,
&
dummy_index
);
rec
,
heap
,
&
dummy_index
);
#ifdef UNIV_IBUF_DEBUG
volume
+=
rec_get_converted_size
(
dummy_index
,
entry
,
0
)
+
page_dir_calc_reserved_space
(
1
);
ut_a
(
volume
<=
4
*
UNIV_PAGE_SIZE
/
IBUF_PAGE_SIZE_PER_FREE_SPACE
);
if
(
op
==
IBUF_OP_INSERT
)
{
volume
+=
rec_get_converted_size
(
dummy_index
,
entry
,
0
);
volume
+=
page_dir_calc_reserved_space
(
1
);
ut_a
(
volume
<=
4
*
UNIV_PAGE_SIZE
/
IBUF_PAGE_SIZE_PER_FREE_SPACE
);
}
#endif
ibuf_insert_to_index_page
(
entry
,
block
,
dummy_index
,
&
mtr
);
switch
(
op
)
{
case
IBUF_OP_INSERT
:
ibuf_insert_to_index_page
(
entry
,
block
,
dummy_index
,
&
mtr
);
break
;
case
IBUF_OP_DELETE_MARK
:
ibuf_set_del_mark
(
entry
,
block
,
dummy_index
,
&
mtr
);
break
;
case
IBUF_OP_DELETE
:
ibuf_delete
(
entry
,
block
,
dummy_index
,
&
mtr
);
break
;
default:
ut_error
;
}
mops
[
op
]
++
;
ibuf_dummy_index_free
(
dummy_index
);
}
else
{
dops
[
ibuf_rec_get_op_type
(
rec
)]
++
;
}
n_inserts
++
;
/* Delete the record from ibuf */
if
(
ibuf_delete_rec
(
space
,
page_no
,
&
pcur
,
search_tuple
,
&
mtr
))
{
...
...
@@ -3414,9 +3858,7 @@ ibuf_merge_or_delete_for_page(
we start from the beginning again */
goto
loop
;
}
if
(
btr_pcur_is_after_last_on_page
(
&
pcur
))
{
}
else
if
(
btr_pcur_is_after_last_on_page
(
&
pcur
))
{
mtr_commit
(
&
mtr
);
btr_pcur_close
(
&
pcur
);
...
...
@@ -3425,43 +3867,32 @@ ibuf_merge_or_delete_for_page(
}
reset_bit:
#ifdef UNIV_IBUF_COUNT_DEBUG
if
(
ibuf_count_get
(
space
,
page_no
)
>
0
)
{
/* btr_print_tree(ibuf_data->index->tree, 100);
ibuf_print(); */
}
#endif
if
(
UNIV_LIKELY
(
update_ibuf_bitmap
))
{
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
mtr
);
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_BUFFERED
,
FALSE
,
&
mtr
);
page_t
*
bitmap_page
;
bitmap_page
=
ibuf_bitmap_get_map_page
(
space
,
page_no
,
zip_size
,
&
mtr
);
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_BUFFERED
,
FALSE
,
&
mtr
);
if
(
block
)
{
ulint
old_bits
=
ibuf_bitmap_page_get_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_FREE
,
&
mtr
);
ulint
new_bits
=
ibuf_index_page_calc_free
(
zip_size
,
block
);
#if 0 /* defined UNIV_IBUF_DEBUG */
fprintf(stderr, "Old bits %lu new bits %lu"
" max size %lu\n",
old_bits, new_bits,
page_get_max_insert_size_after_reorganize(
page, 1));
#endif
if
(
old_bits
!=
new_bits
)
{
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_FREE
,
new_bits
,
&
mtr
);
ibuf_bitmap_page_set_bits
(
bitmap_page
,
page_no
,
zip_size
,
IBUF_BITMAP_FREE
,
new_bits
,
&
mtr
);
}
}
}
#if 0 /* defined UNIV_IBUF_DEBUG */
fprintf(stderr,
"Ibuf merge %lu records volume %lu to page no %lu\n",
n_inserts, volume, page_no);
#endif
mtr_commit
(
&
mtr
);
btr_pcur_close
(
&
pcur
);
mem_heap_free
(
heap
);
...
...
@@ -3469,8 +3900,9 @@ ibuf_merge_or_delete_for_page(
/* Protect our statistics keeping from race conditions */
mutex_enter
(
&
ibuf_mutex
);
ibuf_data
->
n_merges
++
;
ibuf_data
->
n_merged_recs
+=
n_inserts
;
ibuf
->
n_merges
++
;
ibuf_add_ops
(
ibuf
->
n_merged_ops
,
mops
);
ibuf_add_ops
(
ibuf
->
n_discarded_ops
,
dops
);
mutex_exit
(
&
ibuf_mutex
);
...
...
@@ -3480,6 +3912,7 @@ ibuf_merge_or_delete_for_page(
}
ibuf_exit
();
#ifdef UNIV_IBUF_COUNT_DEBUG
ut_a
(
ibuf_count_get
(
space
,
page_no
)
==
0
);
#endif
...
...
@@ -3502,14 +3935,10 @@ ibuf_delete_for_discarded_space(
rec_t
*
ibuf_rec
;
ulint
page_no
;
ibool
closed
;
ibuf_data_t
*
ibuf_data
;
ulint
n_inserts
;
mtr_t
mtr
;
/* Currently the insert buffer of space 0 takes care of inserts to all
tablespaces */
ibuf_data
=
fil_space_get_ibuf_data
(
0
);
/* Counts for discarded operations. */
ulint
dops
[
IBUF_OP_COUNT
];
heap
=
mem_heap_create
(
512
);
...
...
@@ -3518,7 +3947,7 @@ ibuf_delete_for_discarded_space(
search_tuple
=
ibuf_new_search_tuple_build
(
space
,
0
,
heap
);
n_inserts
=
0
;
memset
(
dops
,
0
,
sizeof
(
dops
))
;
loop:
ibuf_enter
();
...
...
@@ -3526,8 +3955,10 @@ ibuf_delete_for_discarded_space(
/* Position pcur in the insert buffer at the first entry for the
space */
btr_pcur_open_on_user_rec
(
ibuf_data
->
index
,
search_tuple
,
PAGE_CUR_GE
,
BTR_MODIFY_LEAF
,
&
pcur
,
&
mtr
);
btr_pcur_open_on_user_rec
(
ibuf
->
index
,
search_tuple
,
PAGE_CUR_GE
,
BTR_MODIFY_LEAF
,
&
pcur
,
&
mtr
);
if
(
!
btr_pcur_is_on_user_rec
(
&
pcur
))
{
ut_ad
(
btr_pcur_is_after_last_in_tree
(
&
pcur
,
&
mtr
));
...
...
@@ -3547,7 +3978,7 @@ ibuf_delete_for_discarded_space(
page_no
=
ibuf_rec_get_page_no
(
ibuf_rec
);
n_inserts
++
;
dops
[
ibuf_rec_get_op_type
(
ibuf_rec
)]
++
;
/* Delete the record from ibuf */
closed
=
ibuf_delete_rec
(
space
,
page_no
,
&
pcur
,
search_tuple
,
...
...
@@ -3577,51 +4008,14 @@ ibuf_delete_for_discarded_space(
/* Protect our statistics keeping from race conditions */
mutex_enter
(
&
ibuf_mutex
);
ibuf_data
->
n_merges
++
;
ibuf_data
->
n_merged_recs
+=
n_inserts
;
ibuf_add_ops
(
ibuf
->
n_discarded_ops
,
dops
);
mutex_exit
(
&
ibuf_mutex
);
/*
fprintf(stderr,
"InnoDB: Discarded %lu ibuf entries for space %lu\n",
(ulong) n_inserts, (ulong) space);
*/
ibuf_exit
();
mem_heap_free
(
heap
);
}
#ifdef UNIV_DEBUG
/**********************************************************************
Validates the ibuf data structures when the caller owns ibuf_mutex. */
static
ibool
ibuf_validate_low
(
void
)
/*===================*/
/* out: TRUE if ok */
{
ibuf_data_t
*
data
;
ulint
sum_sizes
;
ut_ad
(
mutex_own
(
&
ibuf_mutex
));
sum_sizes
=
0
;
data
=
UT_LIST_GET_FIRST
(
ibuf
->
data_list
);
while
(
data
)
{
sum_sizes
+=
data
->
size
;
data
=
UT_LIST_GET_NEXT
(
data_list
,
data
);
}
ut_a
(
sum_sizes
==
ibuf
->
size
);
return
(
TRUE
);
}
#endif
/* UNIV_DEBUG */
/**********************************************************************
Looks if the insert buffer is empty. */
UNIV_INTERN
...
...
@@ -3630,7 +4024,6 @@ ibuf_is_empty(void)
/*===============*/
/* out: TRUE if empty */
{
ibuf_data_t
*
data
;
ibool
is_empty
;
const
page_t
*
root
;
mtr_t
mtr
;
...
...
@@ -3639,17 +4032,15 @@ ibuf_is_empty(void)
mutex_enter
(
&
ibuf_mutex
);
data
=
UT_LIST_GET_FIRST
(
ibuf
->
data_list
);
mtr_start
(
&
mtr
);
root
=
ibuf_tree_root_get
(
data
,
0
,
&
mtr
);
root
=
ibuf_tree_root_get
(
&
mtr
);
if
(
page_get_n_recs
(
root
)
==
0
)
{
is_empty
=
TRUE
;
if
(
data
->
empty
==
FALSE
)
{
if
(
ibuf
->
empty
==
FALSE
)
{
fprintf
(
stderr
,
"InnoDB: Warning: insert buffer tree is empty"
" but the data struct does not
\n
"
...
...
@@ -3658,15 +4049,13 @@ ibuf_is_empty(void)
"InnoDB: run to completion.
\n
"
);
}
}
else
{
ut_a
(
data
->
empty
==
FALSE
);
ut_a
(
ibuf
->
empty
==
FALSE
);
is_empty
=
FALSE
;
}
mtr_commit
(
&
mtr
);
ut_a
(
data
->
space
==
0
);
mutex_exit
(
&
ibuf_mutex
);
ibuf_exit
();
...
...
@@ -3682,39 +4071,42 @@ ibuf_print(
/*=======*/
FILE
*
file
)
/* in: file where to print */
{
ibuf_data_t
*
data
;
#ifdef UNIV_IBUF_COUNT_DEBUG
ulint
i
;
#endif
mutex_enter
(
&
ibuf_mutex
);
data
=
UT_LIST_GET_FIRST
(
ibuf
->
data_list
);
while
(
data
)
{
fprintf
(
file
,
"Ibuf: size %lu, free list len %lu, seg size %lu,
\n
"
"%lu inserts, %lu merged recs, %lu merges
\n
"
,
(
ulong
)
data
->
size
,
(
ulong
)
data
->
free_list_len
,
(
ulong
)
data
->
seg_size
,
(
ulong
)
data
->
n_inserts
,
(
ulong
)
data
->
n_merged_recs
,
(
ulong
)
data
->
n_merges
);
fprintf
(
file
,
"Ibuf: size %lu, free list len %lu, seg size %lu, %lu merges
\n
"
"total operations:
\n
"
,
(
ulong
)
ibuf
->
size
,
(
ulong
)
ibuf
->
free_list_len
,
(
ulong
)
ibuf
->
seg_size
,
(
ulong
)
ibuf
->
n_merges
);
ibuf_print_ops
(
ibuf
->
n_ops
,
file
);
fprintf
(
file
,
"
\n
merged operations:
\n
"
);
ibuf_print_ops
(
ibuf
->
n_merged_ops
,
file
);
fprintf
(
file
,
"
\n
discarded operations:
\n
"
);
ibuf_print_ops
(
ibuf
->
n_discarded_ops
,
file
);
fputs
(
"
\n
"
,
file
);
#ifdef UNIV_IBUF_COUNT_DEBUG
for
(
i
=
0
;
i
<
IBUF_COUNT_N_PAGES
;
i
++
)
{
if
(
ibuf_count_get
(
data
->
space
,
i
)
>
0
)
{
for
(
i
=
0
;
i
<
IBUF_COUNT_N_SPACES
;
i
++
)
{
for
(
j
=
0
;
j
<
IBUF_COUNT_N_PAGES
;
j
++
)
{
ulint
count
=
ibuf_count_get
(
i
,
j
);
if
(
count
>
0
)
{
fprintf
(
stderr
,
"Ibuf count for page %lu is %lu
\n
"
,
(
ulong
)
i
,
(
ulong
)
ibuf_count_get
(
data
->
space
,
i
));
"Ibuf count for space/page %lu/%lu"
" is %lu
\n
"
,
(
ulong
)
i
,
(
ulong
)
j
,
(
ulong
)
count
);
}
}
#endif
data
=
UT_LIST_GET_NEXT
(
data_list
,
data
);
}
#endif
/* UNIV_IBUF_COUNT_DEBUG */
mutex_exit
(
&
ibuf_mutex
);
}
include/btr0btr.h
View file @
35d626f0
...
...
@@ -42,6 +42,8 @@ failure. */
#define BTR_SEARCH_PREV 35
#define BTR_MODIFY_PREV 36
/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
/* If this is ORed to the latch mode, it means that the search tuple will be
inserted to the index, at the searched position */
#define BTR_INSERT 512
...
...
@@ -55,6 +57,19 @@ UNIQUE definition on secondary indexes when we decide if we can use the
insert buffer to speed up inserts */
#define BTR_IGNORE_SEC_UNIQUE 2048
/* Try to delete mark the record at the searched position using the
insert/delete buffer. */
#define BTR_DELETE_MARK 4096
/* Try to delete the record at the searched position using the insert/delete
buffer. */
#define BTR_DELETE 8192
/* If the leaf page is not in the buffer pool: don't read it in, set
cursor->leaf_in_buf_pool to FALSE, and set buf_pool_t::watch_* that
watches for the page to get read in. */
#define BTR_WATCH_LEAF 16384
/******************************************************************
Gets the root node of a tree and x-latches it. */
UNIV_INTERN
...
...
include/btr0btr.ic
View file @
35d626f0
...
...
@@ -118,7 +118,7 @@ btr_page_get_level(
/*===============*/
/* out: level, leaf level == 0 */
const page_t* page, /* in: index page */
mtr_t* mtr
__attribute__((unused))
)
mtr_t* mtr
UNIV_UNUSED
)
/* in: mini-transaction handle */
{
ut_ad(page && mtr);
...
...
@@ -160,7 +160,7 @@ btr_page_get_next(
/*==============*/
/* out: next page number */
const page_t* page, /* in: index page */
mtr_t* mtr
__attribute__((unused))
)
mtr_t* mtr
UNIV_UNUSED
)
/* in: mini-transaction handle */
{
ut_ad(page && mtr);
...
...
@@ -200,7 +200,7 @@ btr_page_get_prev(
/*==============*/
/* out: prev page number */
const page_t* page, /* in: index page */
mtr_t* mtr
__attribute__((unused))
) /* in: mini-transaction handle */
mtr_t* mtr
UNIV_UNUSED
) /* in: mini-transaction handle */
{
ut_ad(page && mtr);
...
...
include/btr0cur.h
View file @
35d626f0
...
...
@@ -312,8 +312,8 @@ btr_cur_del_mark_set_sec_rec(
que_thr_t
*
thr
,
/* in: query thread */
mtr_t
*
mtr
);
/* in: mtr */
/***************************************************************
Sets a secondary index record delete mark to
FALSE. This function
is
only used by the insert buffer insert merge mechanism. */
Sets a secondary index record delete mark to
the given value. Th
is
function is
only used by the insert buffer insert merge mechanism. */
UNIV_INTERN
void
btr_cur_del_unmark_for_ibuf
(
...
...
@@ -323,6 +323,7 @@ btr_cur_del_unmark_for_ibuf(
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool
val
,
/* value to set */
mtr_t
*
mtr
);
/* in: mtr */
/*****************************************************************
Tries to compress a page of the tree if it seems useful. It is assumed
...
...
@@ -572,7 +573,20 @@ btr_push_update_extern_fields(
const
upd_t
*
update
,
/* in: update vector */
mem_heap_t
*
heap
)
/* in: memory heap */
__attribute__
((
nonnull
));
/***************************************************************
Sets a secondary index record's delete mark to the given value. This
function is only used by the insert buffer merge mechanism. */
void
btr_cur_set_deleted_flag_for_ibuf
(
/*==============================*/
rec_t
*
rec
,
/* in: record */
page_zip_des_t
*
page_zip
,
/* in/out: compressed page
corresponding to rec, or NULL
when the tablespace is
uncompressed */
ibool
val
,
/* in: value to set */
mtr_t
*
mtr
);
/* in: mtr */
/*######################################################################*/
/* In the pessimistic delete, if the page data size drops below this
...
...
@@ -657,6 +671,28 @@ struct btr_cur_struct {
NULL */
ulint
fold
;
/* fold value used in the search if
flag is BTR_CUR_HASH */
/*----- Delete buffering -------*/
ulint
ibuf_cnt
;
/* in searches done on insert buffer
trees, this contains the "counter"
value (the first two bytes of the
fourth field) extracted from the
page above the leaf page, from the
father node pointer that pointed to
the leaf page. in other words, it
contains the minimum counter value
for records to be inserted on the
chosen leaf page. If for some reason
this can't be read, or if the search
ended on the leftmost leaf page in
the tree (in which case the father
node pointer had the 'minimum
record' flag set), this is
ULINT_UNDEFINED. */
ibool
leaf_in_buf_pool
;
/* in: in searches done with
BTR_CHECK_LEAF, this is TRUE if the
leaf page is in the buffer pool,
FALSE otherwise. */
/*------------------------------*/
btr_path_t
*
path_arr
;
/* in estimating the number of
rows in range, we store in this array
...
...
@@ -675,6 +711,13 @@ struct btr_cur_struct {
#define BTR_CUR_BINARY 3
/* success using the binary search */
#define BTR_CUR_INSERT_TO_IBUF 4
/* performed the intended insert to
the insert buffer */
#define BTR_CUR_DEL_MARK_IBUF 5
/* performed the intended delete
mark in the insert/delete buffer */
#define BTR_CUR_DELETE_IBUF 6
/* performed the intended delete in
the insert/delete buffer */
#define BTR_CUR_ABORTED 7
/* search with BTR_CHECK_LEAF
aborted due to leaf page not being
in buffer pool */
/* If pessimistic delete fails because of lack of file space,
there is still a good change of success a little later: try this many times,
...
...
include/btr0pcur.h
View file @
35d626f0
...
...
@@ -79,6 +79,16 @@ btr_pcur_open(
btr_pcur_t
*
cursor
,
/* in: memory buffer for persistent cursor */
mtr_t
*
mtr
);
/* in: mtr */
/******************************************************************
Check if an operation was buffered. */
UNIV_INLINE
ibool
btr_pcur_was_buffered
(
/*==================*/
/* out: TRUE if the operation was buffered
in the insert/delete buffer */
const
btr_pcur_t
*
cursor
);
/* in: persistent cursor */
/******************************************************************
Opens an persistent cursor to an index tree without initializing the
cursor. */
UNIV_INLINE
...
...
include/btr0pcur.ic
View file @
35d626f0
...
...
@@ -506,6 +506,28 @@ btr_pcur_open(
cursor->trx_if_known = NULL;
}
/******************************************************************
Check if an operation was buffered. */
UNIV_INLINE
ibool
btr_pcur_was_buffered(
/*==================*/
/* out: TRUE if the operation was buffered
in the insert/delete buffer */
const btr_pcur_t* cursor)
/* in: persistent cursor */
{
const btr_cur_t* btr_cursor;
/* Look in the tree cursor */
btr_cursor = btr_pcur_get_btr_cur(cursor);
return((btr_cursor->flag == BTR_CUR_DEL_MARK_IBUF)
|| (btr_cursor->flag == BTR_CUR_DELETE_IBUF)
|| (btr_cursor->flag == BTR_CUR_INSERT_TO_IBUF));
}
/******************************************************************
Opens an persistent cursor to an index tree without initializing the
cursor. */
...
...
include/buf0buf.h
View file @
35d626f0
...
...
@@ -43,6 +43,10 @@ Created 11/5/1995 Heikki Tuuri
it is error-prone programming not to
set a latch, and it should be used
with care */
#define BUF_GET_IF_IN_POOL_OR_WATCH 15
/* Get the page only if it's in the
buffer pool, if not then set a watch
on the page. */
/* Modes for buf_page_get_known_nowait */
#define BUF_MAKE_YOUNG 51
#define BUF_KEEP_OLD 52
...
...
@@ -165,20 +169,22 @@ read the contents of the page unless you know it is safe. Do not modify
the contents of the page! We have separated this case, because it is
error-prone programming not to set a latch, and it should be used
with care. */
#define buf_page_get_with_no_latch(SP, ZS, OF, MTR)
buf_page_get_gen(\
#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\
SP, ZS, OF, RW_NO_LATCH, NULL,\
BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
BUF_GET_NO_LATCH, \
__FILE__, __LINE__, MTR)
/******************************************************************
NOTE! The following macros should be used instead of buf_page_get_gen, to
improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */
#define buf_page_get_nowait(SP, ZS, OF, LA, MTR) buf_page_get_gen(\
SP, ZS, OF, LA, NULL,\
BUF_GET_NOWAIT, __FILE__, __LINE__, MTR)
BUF_GET_NOWAIT, \
__FILE__, __LINE__, MTR)
/******************************************************************
NOTE! The following macros should be used instead of
buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
RW_X_LATCH are allowed as LA! */
#define buf_page_optimistic_get(LA, BL, MC, MTR)
\
#define buf_page_optimistic_get(LA, BL, MC, MTR) \
buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR)
/************************************************************************
This is the general function used to get optimistic access to a database
...
...
@@ -258,7 +264,8 @@ buf_page_get_gen(
ulint
rw_latch
,
/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t
*
guess
,
/* in: guessed block or NULL */
ulint
mode
,
/* in: BUF_GET, BUF_GET_IF_IN_POOL,
BUF_GET_NO_LATCH */
BUF_GET_NO_LATCH, BUF_GET_NOWAIT or
BUF_GET_IF_IN_POOL_WATCH*/
const
char
*
file
,
/* in: file name */
ulint
line
,
/* in: line where called */
mtr_t
*
mtr
);
/* in: mini-transaction */
...
...
@@ -952,8 +959,23 @@ UNIV_INTERN
ulint
buf_get_free_list_len
(
void
);
/*=======================*/
/********************************************************************
Stop watching if the marked page is read in. */
void
buf_pool_remove_watch
(
void
);
/*=======================*/
/********************************************************************
Check if the given page is being watched and has been read to the buffer
pool. */
ibool
buf_pool_watch_happened
(
/*====================*/
/* out: TRUE if the given page is being
watched and it has been read in */
ulint
space
,
/* in: space id */
ulint
page_no
);
/* in: page number */
/* The common buffer control block structure
for compressed and uncompressed frames */
...
...
@@ -1186,6 +1208,16 @@ struct buf_pool_struct{
buf_block_t file pages,
buf_page_in_file() == TRUE,
indexed by (space_id, offset) */
/*--------------------------*/
/* Delete buffering data */
ibool
watch_active
;
/* if TRUE, set watch_happened to
TRUE when page watch_space/
watch_page_no is read in. */
ulint
watch_space
;
/* space id of watched page */
ulint
watch_page_no
;
/* page number of watched page */
ibool
watch_happened
;
/* has watched page been read in */
/*--------------------------*/
hash_table_t
*
zip_hash
;
/* hash table of buf_block_t blocks
whose frames are allocated to the
zip buddy system,
...
...
include/fil0fil.h
View file @
35d626f0
...
...
@@ -158,14 +158,6 @@ fil_space_get_type(
/* out: FIL_TABLESPACE or FIL_LOG */
ulint
id
);
/* in: space id */
/***********************************************************************
Returns the ibuf data of a file space. */
UNIV_INTERN
ibuf_data_t
*
fil_space_get_ibuf_data
(
/*====================*/
/* out: ibuf data for this space */
ulint
id
);
/* in: space id */
/***********************************************************************
Appends a new file to the chain of files of a space. File must be closed. */
UNIV_INTERN
void
...
...
@@ -274,14 +266,6 @@ fil_set_max_space_id_if_bigger(
/*===========================*/
ulint
max_id
);
/* in: maximum known id */
/********************************************************************
Initializes the ibuf data structure for space 0 == the system tablespace.
This can be called after the file space headers have been created and the
dictionary system has been initialized. */
UNIV_INTERN
void
fil_ibuf_init_at_db_start
(
void
);
/*===========================*/
/********************************************************************
Writes the flushed lsn and the latest archived log number to the page
header of the first page of each data file in the system tablespace. */
UNIV_INTERN
...
...
include/ibuf0ibuf.h
View file @
35d626f0
...
...
@@ -18,23 +18,21 @@ Created 7/19/1997 Heikki Tuuri
#include "ibuf0types.h"
#include "fsp0fsp.h"
/* Possible operations buffered in the insert/whatever buffer. See
ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
typedef
enum
{
IBUF_OP_INSERT
=
0
,
IBUF_OP_DELETE_MARK
=
1
,
IBUF_OP_DELETE
=
2
,
/* Number of different operation types. */
IBUF_OP_COUNT
=
3
,
}
ibuf_op_t
;
extern
ibuf_t
*
ibuf
;
/**********************************************************************
Creates the insert buffer data struct for a single tablespace. Reads the
root page of the insert buffer tree in the tablespace. This function can
be called only after the dictionary system has been initialized, as this
creates also the insert buffer table and index for this tablespace. */
UNIV_INTERN
ibuf_data_t
*
ibuf_data_init_for_space
(
/*=====================*/
/* out, own: ibuf data struct, linked to the list
in ibuf control structure. */
ulint
space
);
/* in: space id */
/**********************************************************************
Creates the insert buffer data structure at a database startup and
initializes the data structures for the insert buffer of each tablespace. */
Creates the insert buffer data structure at a database startup. */
UNIV_INTERN
void
ibuf_init_at_db_start
(
void
);
...
...
@@ -165,38 +163,29 @@ ibuf_page(
/* out: TRUE if level 2 or level 3 page */
ulint
space
,
/* in: space id */
ulint
zip_size
,
/* in: compressed page size in bytes, or 0 */
ulint
page_no
);
/* in: page number */
/***************************************************************************
Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */
UNIV_INTERN
ibool
ibuf_page_low
(
/*==========*/
/* out: TRUE if level 2 or level 3 page */
ulint
space
,
/* in: space id */
ulint
zip_size
,
/* in: compressed page size in bytes, or 0 */
ulint
page_no
,
/* in: page number */
mtr_t
*
mtr
);
/* in: mtr which will contain an x-latch to the
bitmap page if the page is not one of the fixed
address ibuf pages */
address ibuf pages, or NULL, in which case a new
transaction is created. */
/***************************************************************************
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
file segment, and the thread did not own the fsp latch before this call. */
UNIV_INTERN
void
ibuf_free_excess_pages
(
/*===================*/
ulint
space
);
/* in: space id */
ibuf_free_excess_pages
(
void
);
/*========================*/
/*************************************************************************
Makes an index insert to the insert buffer, instead of directly to the disk
page, if this is possible. Does not do insert if the index is clustered
or unique. */
Buffer an operation in the insert/delete buffer, instead of doing it
directly to the disk page, if this is possible. Does not do it if the index
is clustered
or unique. */
UNIV_INTERN
ibool
ibuf_insert
(
/*========*/
/* out: TRUE if success */
ibuf_op_t
op
,
/* in: operation type */
const
dtuple_t
*
entry
,
/* in: index entry to insert */
dict_index_t
*
index
,
/* in: index where to insert */
ulint
space
,
/* in: space id where to insert */
...
...
@@ -205,11 +194,11 @@ ibuf_insert(
que_thr_t
*
thr
);
/* in: query thread */
/*************************************************************************
When an index page is read from a disk to the buffer pool, this function
inserts to the page the possible index entries buffered in the insert buffer.
The entries are deleted from the insert buffer. If the page is not read, but
created in the buffer pool, this function deletes its buffered entries from
the insert buffer; there can exist entries for such a page if the page
belonged to an index which
subsequently was dropped. */
applies any buffered operations to the page and deletes the entries from the
insert buffer. If the page is not read, but created in the buffer pool, this
function deletes its buffered entries from the insert buffer; there can
exist entries for such a page if the page belonged to an index which
subsequently was dropped. */
UNIV_INTERN
void
ibuf_merge_or_delete_for_page
(
...
...
@@ -300,6 +289,16 @@ void
ibuf_print
(
/*=======*/
FILE
*
file
);
/* in: file where to print */
/********************************************************************
Read the first two bytes from a record's fourth field (counter field in new
records; something else in older records). */
ulint
ibuf_rec_get_fake_counter
(
/*======================*/
/* out: "counter" field, or ULINT_UNDEFINED if for
some reason it can't be read*/
rec_t
*
rec
);
/* in: ibuf record */
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
...
...
@@ -309,6 +308,9 @@ for the file segment from which the pages for the ibuf tree are allocated */
#define IBUF_HEADER PAGE_DATA
#define IBUF_TREE_SEG_HEADER 0
/* fseg header for ibuf tree */
/* The insert buffer tree itself is always located in space 0. */
#define IBUF_SPACE_ID 0
#ifndef UNIV_NONINL
#include "ibuf0ibuf.ic"
#endif
...
...
include/ibuf0ibuf.ic
View file @
35d626f0
...
...
@@ -18,36 +18,37 @@ If there is this much of free space, the corresponding bits are set in the
ibuf bitmap. */
#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
/* Insert buffer data struct for a single tablespace */
struct ibuf_data_struct{
ulint space; /* space id */
ulint seg_size;/* allocated pages if the file segment
containing ibuf header and tree */
ulint size; /* size of the insert buffer tree in pages */
ibool empty; /* after an insert to the ibuf tree is
performed, this is set to FALSE, and if a
contract operation finds the tree empty, this
is set to TRUE */
ulint free_list_len;
/* length of the free list */
ulint height; /* tree height */
dict_index_t* index; /* insert buffer index */
UT_LIST_NODE_T(ibuf_data_t) data_list;
/* list of ibuf data structs */
ulint n_inserts;/* number of inserts made to the insert
buffer */
ulint n_merges;/* number of pages merged */
ulint n_merged_recs;/* number of records merged */
};
/* Insert buffer struct */
struct ibuf_struct{
ulint size; /* current size of the ibuf index
trees in pages */
ulint max_size; /* recommended maximum size in pages
for the ibuf index tree */
UT_LIST_BASE_NODE_T(ibuf_data_t) data_list;
/* list of ibuf data structs for
each tablespace */
tree, in pages */
ulint max_size; /* recommended maximum size of the
ibuf index tree, in pages */
ulint seg_size; /* allocated pages of the file
segment containing ibuf header and
tree */
ibool empty; /* after an insert to the ibuf tree
is performed, this is set to FALSE,
and if a contract operation finds
the tree empty, this is set to
TRUE */
ulint free_list_len; /* length of the free list */
ulint height; /* tree height */
dict_index_t* index; /* insert buffer index */
ulint n_ops[IBUF_OP_COUNT];
/* number of operations of each type
done */
ulint n_merges; /* number of pages merged */
ulint n_merged_ops[IBUF_OP_COUNT];
/* number of operations of each type
merged to index pages */
ulint n_discarded_ops[IBUF_OP_COUNT];
/* number of operations of each type
discarded without merging due to the
tablespace being deleted or the
index being dropped */
};
/****************************************************************************
...
...
include/ibuf0types.h
View file @
35d626f0
...
...
@@ -9,7 +9,6 @@ Created 7/29/1997 Heikki Tuuri
#ifndef ibuf0types_h
#define ibuf0types_h
typedef
struct
ibuf_data_struct
ibuf_data_t
;
typedef
struct
ibuf_struct
ibuf_t
;
#endif
include/row0row.h
View file @
35d626f0
...
...
@@ -268,6 +268,9 @@ ibool
row_search_index_entry
(
/*===================*/
/* out: TRUE if found */
ibool
*
was_buffered
,
/* out: TRUE if the operation was buffered
in the insert/delete buffer. Can be NULL. */
dict_index_t
*
index
,
/* in: index */
const
dtuple_t
*
entry
,
/* in: index entry */
ulint
mode
,
/* in: BTR_MODIFY_LEAF, ... */
...
...
include/univ.i
View file @
35d626f0
...
...
@@ -137,6 +137,9 @@ operations (very slow); also UNIV_DEBUG must be defined */
for compressed pages */
#
endif
//#define UNIV_DEBUG
//#define UNIV_SYNC_DEBUG
//#define UNIV_IBUF_DEBUG
#
define
UNIV_BTR_DEBUG
/* check B-tree links */
#
define
UNIV_LIGHT_MEM_DEBUG
/* light memory debugging */
...
...
@@ -316,8 +319,11 @@ it is read. */
/* Minimize cache-miss latency by moving data at addr into a cache before
it is read or written. */
#
define
UNIV_PREFETCH_RW
(
addr
)
__builtin_prefetch
(
addr
,
1
,
3
)
/* Tell the compiler that variable/function is unused. */
#
define
UNIV_UNUSED
__attribute__
((
unused
))
#
else
/* Dummy versions of the macros */
#
define
UNIV_UNUSED
#
define
UNIV_EXPECT
(
expr
,
value
)
(
expr
)
#
define
UNIV_LIKELY_NULL
(
expr
)
(
expr
)
#
define
UNIV_PREFETCH_R
(
addr
)
((
void
)
0
)
...
...
row/row0purge.c
View file @
35d626f0
...
...
@@ -197,11 +197,12 @@ row_purge_remove_clust_if_poss(
}
/***************************************************************
Removes a secondary index entry if possible. */
Removes a secondary index entry if possible, without trying to use the
insert/delete buffer. */
static
ibool
row_purge_remove_sec_if_poss_low
(
/*=============================*/
row_purge_remove_sec_if_poss_low
_nonbuffered
(
/*=============================
============
*/
/* out: TRUE if success or if not found */
purge_node_t
*
node
,
/* in: row purge node */
dict_index_t
*
index
,
/* in: index */
...
...
@@ -212,7 +213,7 @@ row_purge_remove_sec_if_poss_low(
btr_pcur_t
pcur
;
btr_cur_t
*
btr_cur
;
ibool
success
;
ibool
old_has
=
0
;
/* remove warning */
ibool
old_has
=
FALSE
;
/* remove warning */
ibool
found
;
ulint
err
;
mtr_t
mtr
;
...
...
@@ -221,13 +222,13 @@ row_purge_remove_sec_if_poss_low(
log_free_check
();
mtr_start
(
&
mtr
);
found
=
row_search_index_entry
(
index
,
entry
,
mode
,
&
pcur
,
&
mtr
);
found
=
row_search_index_entry
(
NULL
,
index
,
entry
,
mode
,
&
pcur
,
&
mtr
);
if
(
!
found
)
{
/* Not found */
/* fputs("PURGE:........sec entry not found\n", stderr); */
/* dtuple_print(
stderr,
entry); */
/* dtuple_print(entry); */
btr_pcur_close
(
&
pcur
);
mtr_commit
(
&
mtr
);
...
...
@@ -266,8 +267,13 @@ row_purge_remove_sec_if_poss_low(
ut_ad
(
mode
==
BTR_MODIFY_TREE
);
btr_cur_pessimistic_delete
(
&
err
,
FALSE
,
btr_cur
,
FALSE
,
&
mtr
);
success
=
err
==
DB_SUCCESS
;
ut_a
(
success
||
err
==
DB_OUT_OF_FILE_SPACE
);
if
(
err
==
DB_SUCCESS
)
{
success
=
TRUE
;
}
else
if
(
err
==
DB_OUT_OF_FILE_SPACE
)
{
success
=
FALSE
;
}
else
{
ut_error
;
}
}
}
...
...
@@ -277,6 +283,117 @@ row_purge_remove_sec_if_poss_low(
return
(
success
);
}
/***************************************************************
Removes a secondary index entry if possible. */
static
ibool
row_purge_remove_sec_if_poss_low
(
/*=============================*/
/* out: TRUE if success or if not found */
purge_node_t
*
node
,
/* in: row purge node */
dict_index_t
*
index
,
/* in: index */
dtuple_t
*
entry
,
/* in: index entry */
ulint
mode
)
/* in: latch mode BTR_MODIFY_LEAF or
BTR_MODIFY_TREE */
{
mtr_t
mtr
;
btr_pcur_t
pcur
;
btr_cur_t
*
btr_cur
;
ibool
found
;
ibool
success
;
ibool
was_buffered
;
ibool
old_has
=
FALSE
;
ibool
leaf_in_buf_pool
;
ut_a
((
mode
==
BTR_MODIFY_TREE
)
||
(
mode
==
BTR_MODIFY_LEAF
));
if
(
mode
==
BTR_MODIFY_TREE
)
{
/* Can't use the insert/delete buffer if we potentially
need to split pages. */
return
(
row_purge_remove_sec_if_poss_low_nonbuffered
(
node
,
index
,
entry
,
mode
));
}
log_free_check
();
mtr_start
(
&
mtr
);
found
=
row_search_index_entry
(
NULL
,
index
,
entry
,
BTR_SEARCH_LEAF
|
BTR_WATCH_LEAF
,
&
pcur
,
&
mtr
);
btr_cur
=
btr_pcur_get_btr_cur
(
&
pcur
);
leaf_in_buf_pool
=
btr_cur
->
leaf_in_buf_pool
;
ut_a
(
!
(
found
&&
!
leaf_in_buf_pool
));
btr_pcur_close
(
&
pcur
);
mtr_commit
(
&
mtr
);
if
(
leaf_in_buf_pool
)
{
if
(
found
)
{
/* Index entry exists and is in the buffer pool, no
need to use the insert/delete buffer. */
return
(
row_purge_remove_sec_if_poss_low_nonbuffered
(
node
,
index
,
entry
,
BTR_MODIFY_LEAF
));
}
else
{
/* Index entry does not exist, nothing to do. */
return
(
TRUE
);
}
}
/* We should remove the index record if no later version of the row,
which cannot be purged yet, requires its existence. If some
requires, we should do nothing. */
mtr_start
(
&
mtr
);
success
=
row_purge_reposition_pcur
(
BTR_SEARCH_LEAF
,
node
,
&
mtr
);
if
(
success
)
{
old_has
=
row_vers_old_has_index_entry
(
TRUE
,
btr_pcur_get_rec
(
&
node
->
pcur
),
&
mtr
,
index
,
entry
);
}
btr_pcur_commit_specify_mtr
(
&
node
->
pcur
,
&
mtr
);
if
(
success
&&
old_has
)
{
/* Can't remove the index record yet. */
buf_pool_remove_watch
();
return
(
TRUE
);
}
mtr_start
(
&
mtr
);
btr_cur
->
thr
=
que_node_get_parent
(
node
);
row_search_index_entry
(
&
was_buffered
,
index
,
entry
,
BTR_MODIFY_LEAF
|
BTR_DELETE
,
&
pcur
,
&
mtr
);
btr_pcur_close
(
&
pcur
);
mtr_commit
(
&
mtr
);
buf_pool_remove_watch
();
if
(
!
was_buffered
)
{
/* Page read into buffer pool or delete-buffering failed. */
return
(
row_purge_remove_sec_if_poss_low_nonbuffered
(
node
,
index
,
entry
,
BTR_MODIFY_LEAF
));
}
return
(
TRUE
);
}
/***************************************************************
Removes a secondary index entry if possible. */
UNIV_INLINE
...
...
row/row0row.c
View file @
35d626f0
...
...
@@ -789,6 +789,9 @@ ibool
row_search_index_entry
(
/*===================*/
/* out: TRUE if found */
ibool
*
was_buffered
,
/* out: TRUE if the operation was buffered
in the insert/delete buffer. Can be NULL. */
dict_index_t
*
index
,
/* in: index */
const
dtuple_t
*
entry
,
/* in: index entry */
ulint
mode
,
/* in: BTR_MODIFY_LEAF, ... */
...
...
@@ -799,17 +802,48 @@ row_search_index_entry(
ulint
n_fields
;
ulint
low_match
;
rec_t
*
rec
;
ibool
ret
;
ut_ad
(
dtuple_check_typed
(
entry
));
btr_pcur_open
(
index
,
entry
,
PAGE_CUR_LE
,
mode
,
pcur
,
mtr
);
ret
=
btr_pcur_was_buffered
(
pcur
);
if
(
was_buffered
)
{
*
was_buffered
=
ret
;
}
if
(
ret
)
{
/* Operation was buffered in the insert/delete buffer;
pretend that we found the record. */
return
(
TRUE
);
}
else
if
((
mode
&
BTR_WATCH_LEAF
)
&&
!
btr_pcur_get_btr_cur
(
pcur
)
->
leaf_in_buf_pool
)
{
/* We did not read in the leaf page, thus we can't have
found anything. */
return
(
FALSE
);
}
low_match
=
btr_pcur_get_low_match
(
pcur
);
rec
=
btr_pcur_get_rec
(
pcur
);
n_fields
=
dtuple_get_n_fields
(
entry
);
return
(
!
page_rec_is_infimum
(
rec
)
&&
low_match
==
n_fields
);
if
(
page_rec_is_infimum
(
rec
))
{
return
(
FALSE
);
}
else
if
(
low_match
!=
n_fields
)
{
/* Not found */
return
(
FALSE
);
}
return
(
TRUE
);
}
#ifndef UNIV_HOTBACKUP
...
...
row/row0uins.c
View file @
35d626f0
...
...
@@ -136,7 +136,7 @@ row_undo_ins_remove_sec_low(
log_free_check
();
mtr_start
(
&
mtr
);
found
=
row_search_index_entry
(
index
,
entry
,
mode
,
&
pcur
,
&
mtr
);
found
=
row_search_index_entry
(
NULL
,
index
,
entry
,
mode
,
&
pcur
,
&
mtr
);
btr_cur
=
btr_pcur_get_btr_cur
(
&
pcur
);
...
...
row/row0umod.c
View file @
35d626f0
...
...
@@ -307,7 +307,7 @@ row_undo_mod_del_mark_or_remove_sec_low(
log_free_check
();
mtr_start
(
&
mtr
);
found
=
row_search_index_entry
(
index
,
entry
,
mode
,
&
pcur
,
&
mtr
);
found
=
row_search_index_entry
(
NULL
,
index
,
entry
,
mode
,
&
pcur
,
&
mtr
);
btr_cur
=
btr_pcur_get_btr_cur
(
&
pcur
);
...
...
@@ -432,7 +432,7 @@ row_undo_mod_del_unmark_sec_and_undo_update(
return
(
DB_SUCCESS
);
}
if
(
UNIV_UNLIKELY
(
!
row_search_index_entry
(
index
,
entry
,
if
(
UNIV_UNLIKELY
(
!
row_search_index_entry
(
NULL
,
index
,
entry
,
mode
,
&
pcur
,
&
mtr
)))
{
fputs
(
"InnoDB: error in sec index entry del undo in
\n
"
"InnoDB: "
,
stderr
);
...
...
row/row0upd.c
View file @
35d626f0
...
...
@@ -1451,21 +1451,23 @@ row_upd_sec_index_entry(
upd_node_t
*
node
,
/* in: row update node */
que_thr_t
*
thr
)
/* in: query thread */
{
ibool
check_ref
;
ibool
found
;
dict_index_t
*
index
;
dtuple_t
*
entry
;
mtr_t
mtr
;
rec_t
*
rec
;
btr_pcur_t
pcur
;
btr_cur_t
*
btr_cur
;
mem_heap_t
*
heap
;
rec_t
*
rec
;
ulint
err
=
DB_SUCCESS
;
mtr_t
mtr
;
trx_t
*
trx
=
thr_get_trx
(
thr
);
dtuple_t
*
entry
;
dict_index_t
*
index
;
ibool
found
;
btr_cur_t
*
btr_cur
;
ibool
referenced
;
ibool
was_buffered
;
ulint
err
=
DB_SUCCESS
;
trx_t
*
trx
=
thr_get_trx
(
thr
);
ulint
mode
=
BTR_MODIFY_LEAF
;
index
=
node
->
index
;
check_ref
=
row_upd_index_is_referenced
(
index
,
trx
);
referenced
=
row_upd_index_is_referenced
(
index
,
trx
);
heap
=
mem_heap_create
(
1024
);
...
...
@@ -1476,8 +1478,24 @@ row_upd_sec_index_entry(
log_free_check
();
mtr_start
(
&
mtr
);
found
=
row_search_index_entry
(
index
,
entry
,
BTR_MODIFY_LEAF
,
&
pcur
,
&
mtr
);
btr_pcur_get_btr_cur
(
&
pcur
)
->
thr
=
thr
;
/* We can only try to use the insert/delete buffer to buffer
delete-mark operations if the index we're modifying has no foreign
key constraints referring to it. */
if
(
!
referenced
)
{
mode
|=
BTR_DELETE_MARK
;
}
found
=
row_search_index_entry
(
&
was_buffered
,
index
,
entry
,
BTR_MODIFY_LEAF
,
&
pcur
,
&
mtr
);
if
(
was_buffered
)
{
/* Entry was delete marked already. */
goto
close_cur
;
}
btr_cur
=
btr_pcur_get_btr_cur
(
&
pcur
);
rec
=
btr_cur_get_rec
(
btr_cur
);
...
...
@@ -1504,15 +1522,20 @@ row_upd_sec_index_entry(
delete marked if we return after a lock wait in
row_ins_index_entry below */
if
(
!
rec_get_deleted_flag
(
rec
,
dict_table_is_comp
(
index
->
table
)))
{
err
=
btr_cur_del_mark_set_sec_rec
(
0
,
btr_cur
,
TRUE
,
thr
,
&
mtr
);
if
(
err
==
DB_SUCCESS
&&
check_ref
)
{
if
(
!
rec_get_deleted_flag
(
rec
,
dict_table_is_comp
(
index
->
table
)))
{
err
=
btr_cur_del_mark_set_sec_rec
(
0
,
btr_cur
,
TRUE
,
thr
,
&
mtr
);
if
(
err
==
DB_SUCCESS
&&
referenced
)
{
ulint
*
offsets
;
offsets
=
rec_get_offsets
(
rec
,
index
,
NULL
,
ULINT_UNDEFINED
,
&
heap
);
ulint
*
offsets
=
rec_get_offsets
(
rec
,
index
,
NULL
,
ULINT_UNDEFINED
,
&
heap
);
/* NOTE that the following call loses
the position of pcur ! */
err
=
row_upd_check_references_constraints
(
...
...
@@ -1522,6 +1545,7 @@ row_upd_sec_index_entry(
}
}
close_cur:
btr_pcur_close
(
&
pcur
);
mtr_commit
(
&
mtr
);
...
...
@@ -1583,7 +1607,7 @@ row_upd_clust_rec_by_insert(
upd_node_t
*
node
,
/* in: row update node */
dict_index_t
*
index
,
/* in: clustered index of the record */
que_thr_t
*
thr
,
/* in: query thread */
ibool
check_ref
,
/* in: TRUE if index may be referenced in
ibool
referenced
,
/* in: TRUE if index may be referenced in
a foreign key constraint */
mtr_t
*
mtr
)
/* in: mtr; gets committed here */
{
...
...
@@ -1629,16 +1653,21 @@ row_upd_clust_rec_by_insert(
btr_cur_mark_extern_inherited_fields
(
btr_cur_get_page_zip
(
btr_cur
),
rec
,
index
,
offsets
,
node
->
update
,
mtr
);
if
(
check_ref
)
{
if
(
referenced
)
{
/* NOTE that the following call loses
the position of pcur ! */
err
=
row_upd_check_references_constraints
(
node
,
pcur
,
table
,
index
,
offsets
,
thr
,
mtr
);
if
(
err
!=
DB_SUCCESS
)
{
mtr_commit
(
mtr
);
if
(
UNIV_LIKELY_NULL
(
heap
))
{
mem_heap_free
(
heap
);
}
return
(
err
);
}
}
...
...
@@ -1794,7 +1823,8 @@ row_upd_del_mark_clust_rec(
ulint
*
offsets
,
/* in/out: rec_get_offsets() for the
record under the cursor */
que_thr_t
*
thr
,
/* in: query thread */
ibool
check_ref
,
/* in: TRUE if index may be referenced in
ibool
referenced
,
/* in: TRUE if index may be referenced in
a foreign key constraint */
mtr_t
*
mtr
)
/* in: mtr; gets committed here */
{
...
...
@@ -1819,13 +1849,11 @@ row_upd_del_mark_clust_rec(
err
=
btr_cur_del_mark_set_clust_rec
(
BTR_NO_LOCKING_FLAG
,
btr_cur
,
TRUE
,
thr
,
mtr
);
if
(
err
==
DB_SUCCESS
&&
check_ref
)
{
if
(
err
==
DB_SUCCESS
&&
referenced
)
{
/* NOTE that the following call loses the position of pcur ! */
err
=
row_upd_check_references_constraints
(
node
,
pcur
,
index
->
table
,
index
,
offsets
,
thr
,
mtr
);
err
=
row_upd_check_references_constraints
(
node
,
pcur
,
index
->
table
,
index
,
offsets
,
thr
,
mtr
);
}
mtr_commit
(
mtr
);
...
...
@@ -1848,7 +1876,6 @@ row_upd_clust_step(
dict_index_t
*
index
;
btr_pcur_t
*
pcur
;
ibool
success
;
ibool
check_ref
;
ulint
err
;
mtr_t
*
mtr
;
mtr_t
mtr_buf
;
...
...
@@ -1856,11 +1883,12 @@ row_upd_clust_step(
mem_heap_t
*
heap
=
NULL
;
ulint
offsets_
[
REC_OFFS_NORMAL_SIZE
];
ulint
*
offsets
;
ibool
referenced
;
rec_offs_init
(
offsets_
);
index
=
dict_table_get_first_index
(
node
->
table
);
check_ref
=
row_upd_index_is_referenced
(
index
,
thr_get_trx
(
thr
));
referenced
=
row_upd_index_is_referenced
(
index
,
thr_get_trx
(
thr
));
pcur
=
node
->
pcur
;
...
...
@@ -1930,8 +1958,9 @@ row_upd_clust_step(
/* NOTE: the following function calls will also commit mtr */
if
(
node
->
is_delete
)
{
err
=
row_upd_del_mark_clust_rec
(
node
,
index
,
offsets
,
thr
,
check_ref
,
mtr
);
err
=
row_upd_del_mark_clust_rec
(
node
,
index
,
offsets
,
thr
,
referenced
,
mtr
);
if
(
err
==
DB_SUCCESS
)
{
node
->
state
=
UPD_NODE_UPDATE_ALL_SEC
;
node
->
index
=
dict_table_get_next_index
(
index
);
...
...
@@ -1979,8 +2008,9 @@ row_upd_clust_step(
choosing records to update. MySQL solves now the problem
externally! */
err
=
row_upd_clust_rec_by_insert
(
node
,
index
,
thr
,
check_ref
,
mtr
);
err
=
row_upd_clust_rec_by_insert
(
node
,
index
,
thr
,
referenced
,
mtr
);
if
(
err
!=
DB_SUCCESS
)
{
return
(
err
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment