Commit 3cc4babd authored by Michael Widenius's avatar Michael Widenius

Fix for Bug#36499 Maria: potential deadlock

This was done by introducing another mutex for handling the key_del link
I also renamed all key_del variables to start with key_del prefix


storage/maria/ma_close.c:
  Rename of key_del variables
storage/maria/ma_key_recover.c:
  Changed key_del to be protexted by it's own mutex: key_del_lock
  Rename of key_del variables
  Removed comment for old bug
storage/maria/ma_key_recover.h:
  Rename of key_del variables
storage/maria/ma_open.c:
  Initialization for new key_del_lock mutex
  Renamed intern_cond to key_del_cond as it was only used for protection of key_del
storage/maria/ma_page.c:
  Rename of key_del variables
storage/maria/ma_write.c:
  Rename of key_del variables
storage/maria/maria_def.h:
  Rename of key_del variables
  Added key_del_lock
parent a3af9a78
......@@ -33,7 +33,7 @@ int maria_close(register MARIA_HA *info)
(uint) share->tot_locks));
/* Check that we have unlocked key delete-links properly */
DBUG_ASSERT(info->used_key_del == 0);
DBUG_ASSERT(info->key_del_used == 0);
pthread_mutex_lock(&THR_LOCK_maria);
if (info->lock_type == F_EXTRA_LCK)
......
......@@ -203,16 +203,6 @@ my_bool write_hook_for_undo_key(enum translog_record_type type,
(struct st_msg_to_write_hook_for_undo_key *) hook_arg;
*msg->root= msg->value;
/**
@todo BUG
so we have log mutex and then intern_lock.
While in checkpoint we have intern_lock and then log mutex, like when we
flush bitmap (flushing bitmap pages can call hook which takes log mutex);
and in _ma_update_state_lsns_sub() this is the same.
So we can deadlock.
Another one is that in translog_assign_id_to_share() we have intern_lock
and then log mutex.
*/
_ma_fast_unlock_key_del(tbl_info);
return write_hook_for_undo(type, trn, tbl_info, lsn, 0);
}
......@@ -1209,14 +1199,14 @@ my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
@note
To allow higher concurrency in the common case where we do inserts
and we don't have any linked blocks we do the following:
- Mark in info->used_key_del that we are not using key_del
- Mark in info->key_del_used that we are not using key_del
- Return at once (without marking key_del as used)
This is safe as we in this case don't write current_key_del into
This is safe as we in this case don't write key_del_current into
the redo log and during recover we are not updating key_del.
@retval 1 Use page at end of file
@retval 0 Use page at share->current_key_del
@retval 0 Use page at share->key_del_current
*/
my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
......@@ -1224,68 +1214,72 @@ my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
MARIA_SHARE *share= info->s;
/*
info->used_key_del is 0 initially.
info->key_del_used is 0 initially.
If the caller needs a block (_ma_new()), we look at the free list:
- looks empty? then caller will create a new block at end of file and
remember (through info->used_key_del==2) that it will not change
remember (through info->key_del_used==2) that it will not change
state.key_del and does not need to wake up waiters as nobody will wait for
it.
- non-empty? then we wait for other users of the state.key_del list to
have finished, then we lock this list (through share->used_key_del==1)
have finished, then we lock this list (through share->key_del_used==1)
because we need to prevent some other thread to also read state.key_del
and use the same page as ours. We remember through info->used_key_del==1
and use the same page as ours. We remember through info->key_del_used==1
that we will have to set state.key_del at unlock time and wake up
waiters.
If the caller wants to free a block (_ma_dispose()), "empty" and
"non-empty" are treated as "non-empty" is treated above.
When we are ready to unlock, we copy share->current_key_del into
When we are ready to unlock, we copy share->key_del_current into
state.key_del. Unlocking happens when writing the UNDO log record, that
can make a long lock time.
Why we wrote "*looks* empty": because we are looking at state.key_del
which may be slightly old (share->current_key_del may be more recent and
which may be slightly old (share->key_del_current may be more recent and
exact): when we want a new page, we tolerate to treat "there was no free
page 1 millisecond ago" as "there is no free page". It's ok to non-pop
(_ma_new(), page will be found later anyway) but it's not ok to non-push
(_ma_dispose(), page would be lost).
When we leave this function, info->used_key_del is always 1 or 2.
When we leave this function, info->key_del_used is always 1 or 2.
*/
if (info->used_key_del != 1)
if (info->key_del_used != 1)
{
pthread_mutex_lock(&share->intern_lock);
pthread_mutex_lock(&share->key_del_lock);
if (share->state.key_del == HA_OFFSET_ERROR && insert_at_end)
{
pthread_mutex_unlock(&share->intern_lock);
info->used_key_del= 2; /* insert-with-append */
pthread_mutex_unlock(&share->key_del_lock);
info->key_del_used= 2; /* insert-with-append */
return 1;
}
#ifdef THREAD
while (share->used_key_del)
pthread_cond_wait(&share->intern_cond, &share->intern_lock);
while (share->key_del_used)
pthread_cond_wait(&share->key_del_cond, &share->key_del_lock);
#endif
info->used_key_del= 1;
share->used_key_del= 1;
share->current_key_del= share->state.key_del;
pthread_mutex_unlock(&share->intern_lock);
info->key_del_used= 1;
share->key_del_used= 1;
share->key_del_current= share->state.key_del;
pthread_mutex_unlock(&share->key_del_lock);
}
return share->current_key_del == HA_OFFSET_ERROR;
return share->key_del_current == HA_OFFSET_ERROR;
}
/**
@brief copy changes to key_del and unlock it
@notes
In case of many threads using the maria table, we always have a lock
on the translog when comming here.
*/
void _ma_unlock_key_del(MARIA_HA *info)
{
DBUG_ASSERT(info->used_key_del);
if (info->used_key_del == 1) /* Ignore insert-with-append */
DBUG_ASSERT(info->key_del_used);
if (info->key_del_used == 1) /* Ignore insert-with-append */
{
MARIA_SHARE *share= info->s;
pthread_mutex_lock(&share->intern_lock);
share->used_key_del= 0;
share->state.key_del= share->current_key_del;
pthread_mutex_unlock(&share->intern_lock);
pthread_cond_signal(&share->intern_cond);
pthread_mutex_lock(&share->key_del_lock);
share->key_del_used= 0;
share->state.key_del= share->key_del_current;
pthread_mutex_unlock(&share->key_del_lock);
pthread_cond_signal(&share->key_del_cond);
}
info->used_key_del= 0;
info->key_del_used= 0;
}
......@@ -114,6 +114,6 @@ extern my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end);
extern void _ma_unlock_key_del(MARIA_HA *info);
static inline void _ma_fast_unlock_key_del(MARIA_HA *info)
{
if (info->used_key_del)
if (info->key_del_used)
_ma_unlock_key_del(info);
}
......@@ -807,8 +807,9 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
}
#ifdef THREAD
thr_lock_init(&share->lock);
VOID(pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST));
VOID(pthread_cond_init(&share->intern_cond, 0));
pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST);
pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST);
pthread_cond_init(&share->key_del_cond, 0);
for (i=0; i<keys; i++)
VOID(my_rwlock_init(&share->keyinfo[i].root_lock, NULL));
VOID(my_rwlock_init(&share->mmap_lock, NULL));
......@@ -1215,6 +1216,13 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
(should only be needed after ALTER TABLE
ENABLE/DISABLE KEYS, and REPAIR/OPTIMIZE).
@notes
For transactional multiuser tables, this function is called
with intern_lock & translog_lock or when the last thread who
is using the table is closing it.
Because of the translog_lock we don't need to have a lock on
key_del_lock.
@return Operation status
@retval 0 OK
@retval 1 Error
......
......@@ -223,8 +223,8 @@ int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read)
(void) _ma_lock_key_del(info, 0);
old_link= share->current_key_del;
share->current_key_del= pos;
old_link= share->key_del_current;
share->key_del_current= pos;
page_no= pos / block_size;
bzero(buff, share->keypage_header);
_ma_store_keynr(share, buff, (uchar) MARIA_DELETE_KEY_NR);
......@@ -347,7 +347,7 @@ my_off_t _ma_new(register MARIA_HA *info, int level,
else
{
uchar *buff;
pos= share->current_key_del; /* Protected */
pos= share->key_del_current; /* Protected */
DBUG_ASSERT(share->pagecache->block_size == block_size);
if (!(buff= pagecache_read(share->pagecache,
&share->kfile,
......@@ -362,15 +362,15 @@ my_off_t _ma_new(register MARIA_HA *info, int level,
(single linked list):
*/
#ifndef DBUG_OFF
my_off_t current_key_del;
my_off_t key_del_current;
#endif
share->current_key_del= mi_sizekorr(buff+share->keypage_header);
share->key_del_current= mi_sizekorr(buff+share->keypage_header);
#ifndef DBUG_OFF
current_key_del= share->current_key_del;
DBUG_ASSERT(current_key_del != share->state.key_del &&
(current_key_del != 0) &&
((current_key_del == HA_OFFSET_ERROR) ||
(current_key_del <=
key_del_current= share->key_del_current;
DBUG_ASSERT(key_del_current != share->state.key_del &&
(key_del_current != 0) &&
((key_del_current == HA_OFFSET_ERROR) ||
(key_del_current <=
(share->state.state.key_file_length - block_size))));
#endif
}
......
......@@ -1757,11 +1757,11 @@ my_bool _ma_log_new(MARIA_HA *info, my_off_t page, const uchar *buff,
page_store(log_data + FILEID_STORE_SIZE, page);
/* Store link to next unused page */
if (info->used_key_del == 2)
if (info->key_del_used == 2)
page= 0; /* key_del not changed */
else
page= ((share->current_key_del == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
share->current_key_del / share->block_size);
page= ((share->key_del_current == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
share->key_del_current / share->block_size);
page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
key_nr_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE*2, key_nr);
......
......@@ -336,7 +336,7 @@ typedef struct st_maria_share
size_t (*file_read)(MARIA_HA *, uchar *, size_t, my_off_t, myf);
size_t (*file_write)(MARIA_HA *, const uchar *, size_t, my_off_t, myf);
invalidator_by_filename invalidator; /* query cache invalidator */
my_off_t current_key_del; /* delete links for index pages */
my_off_t key_del_current; /* delete links for index pages */
ulong this_process; /* processid */
ulong last_process; /* For table-change-check */
ulong last_version; /* Version on start */
......@@ -380,12 +380,13 @@ typedef struct st_maria_share
*/
my_bool now_transactional;
my_bool have_versioning;
my_bool used_key_del; /* != 0 if key_del is locked */
my_bool key_del_used; /* != 0 if key_del is locked */
#ifdef THREAD
THR_LOCK lock;
void (*lock_restore_status)(void *);
pthread_mutex_t intern_lock; /* Locking for use with _locking */
pthread_cond_t intern_cond;
pthread_mutex_t key_del_lock;
pthread_cond_t key_del_cond;
#endif
my_off_t mmaped_length;
uint nonmmaped_inserts; /* counter of writing in
......@@ -534,7 +535,7 @@ struct st_maria_handler
int save_lastinx;
uint preload_buff_size; /* When preloading indexes */
uint16 last_used_keyseg; /* For MARIAMRG */
uint8 used_key_del; /* != 0 if key_del is used */
uint8 key_del_used; /* != 0 if key_del is used */
my_bool was_locked; /* Was locked in panic */
my_bool append_insert_at_end; /* Set if concurrent insert */
my_bool quick_mode;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment