Commit 3cc4babd authored by Michael Widenius's avatar Michael Widenius

Fix for Bug#36499 Maria: potential deadlock

This was done by introducing another mutex for handling the key_del link
I also renamed all key_del variables to start with key_del prefix


storage/maria/ma_close.c:
  Rename of key_del variables
storage/maria/ma_key_recover.c:
  Changed key_del to be protexted by it's own mutex: key_del_lock
  Rename of key_del variables
  Removed comment for old bug
storage/maria/ma_key_recover.h:
  Rename of key_del variables
storage/maria/ma_open.c:
  Initialization for new key_del_lock mutex
  Renamed intern_cond to key_del_cond as it was only used for protection of key_del
storage/maria/ma_page.c:
  Rename of key_del variables
storage/maria/ma_write.c:
  Rename of key_del variables
storage/maria/maria_def.h:
  Rename of key_del variables
  Added key_del_lock
parent a3af9a78
...@@ -33,7 +33,7 @@ int maria_close(register MARIA_HA *info) ...@@ -33,7 +33,7 @@ int maria_close(register MARIA_HA *info)
(uint) share->tot_locks)); (uint) share->tot_locks));
/* Check that we have unlocked key delete-links properly */ /* Check that we have unlocked key delete-links properly */
DBUG_ASSERT(info->used_key_del == 0); DBUG_ASSERT(info->key_del_used == 0);
pthread_mutex_lock(&THR_LOCK_maria); pthread_mutex_lock(&THR_LOCK_maria);
if (info->lock_type == F_EXTRA_LCK) if (info->lock_type == F_EXTRA_LCK)
......
...@@ -203,16 +203,6 @@ my_bool write_hook_for_undo_key(enum translog_record_type type, ...@@ -203,16 +203,6 @@ my_bool write_hook_for_undo_key(enum translog_record_type type,
(struct st_msg_to_write_hook_for_undo_key *) hook_arg; (struct st_msg_to_write_hook_for_undo_key *) hook_arg;
*msg->root= msg->value; *msg->root= msg->value;
/**
@todo BUG
so we have log mutex and then intern_lock.
While in checkpoint we have intern_lock and then log mutex, like when we
flush bitmap (flushing bitmap pages can call hook which takes log mutex);
and in _ma_update_state_lsns_sub() this is the same.
So we can deadlock.
Another one is that in translog_assign_id_to_share() we have intern_lock
and then log mutex.
*/
_ma_fast_unlock_key_del(tbl_info); _ma_fast_unlock_key_del(tbl_info);
return write_hook_for_undo(type, trn, tbl_info, lsn, 0); return write_hook_for_undo(type, trn, tbl_info, lsn, 0);
} }
...@@ -1209,14 +1199,14 @@ my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn, ...@@ -1209,14 +1199,14 @@ my_bool _ma_apply_undo_key_delete(MARIA_HA *info, LSN undo_lsn,
@note @note
To allow higher concurrency in the common case where we do inserts To allow higher concurrency in the common case where we do inserts
and we don't have any linked blocks we do the following: and we don't have any linked blocks we do the following:
- Mark in info->used_key_del that we are not using key_del - Mark in info->key_del_used that we are not using key_del
- Return at once (without marking key_del as used) - Return at once (without marking key_del as used)
This is safe as we in this case don't write current_key_del into This is safe as we in this case don't write key_del_current into
the redo log and during recover we are not updating key_del. the redo log and during recover we are not updating key_del.
@retval 1 Use page at end of file @retval 1 Use page at end of file
@retval 0 Use page at share->current_key_del @retval 0 Use page at share->key_del_current
*/ */
my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end) my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
...@@ -1224,68 +1214,72 @@ my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end) ...@@ -1224,68 +1214,72 @@ my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end)
MARIA_SHARE *share= info->s; MARIA_SHARE *share= info->s;
/* /*
info->used_key_del is 0 initially. info->key_del_used is 0 initially.
If the caller needs a block (_ma_new()), we look at the free list: If the caller needs a block (_ma_new()), we look at the free list:
- looks empty? then caller will create a new block at end of file and - looks empty? then caller will create a new block at end of file and
remember (through info->used_key_del==2) that it will not change remember (through info->key_del_used==2) that it will not change
state.key_del and does not need to wake up waiters as nobody will wait for state.key_del and does not need to wake up waiters as nobody will wait for
it. it.
- non-empty? then we wait for other users of the state.key_del list to - non-empty? then we wait for other users of the state.key_del list to
have finished, then we lock this list (through share->used_key_del==1) have finished, then we lock this list (through share->key_del_used==1)
because we need to prevent some other thread to also read state.key_del because we need to prevent some other thread to also read state.key_del
and use the same page as ours. We remember through info->used_key_del==1 and use the same page as ours. We remember through info->key_del_used==1
that we will have to set state.key_del at unlock time and wake up that we will have to set state.key_del at unlock time and wake up
waiters. waiters.
If the caller wants to free a block (_ma_dispose()), "empty" and If the caller wants to free a block (_ma_dispose()), "empty" and
"non-empty" are treated as "non-empty" is treated above. "non-empty" are treated as "non-empty" is treated above.
When we are ready to unlock, we copy share->current_key_del into When we are ready to unlock, we copy share->key_del_current into
state.key_del. Unlocking happens when writing the UNDO log record, that state.key_del. Unlocking happens when writing the UNDO log record, that
can make a long lock time. can make a long lock time.
Why we wrote "*looks* empty": because we are looking at state.key_del Why we wrote "*looks* empty": because we are looking at state.key_del
which may be slightly old (share->current_key_del may be more recent and which may be slightly old (share->key_del_current may be more recent and
exact): when we want a new page, we tolerate to treat "there was no free exact): when we want a new page, we tolerate to treat "there was no free
page 1 millisecond ago" as "there is no free page". It's ok to non-pop page 1 millisecond ago" as "there is no free page". It's ok to non-pop
(_ma_new(), page will be found later anyway) but it's not ok to non-push (_ma_new(), page will be found later anyway) but it's not ok to non-push
(_ma_dispose(), page would be lost). (_ma_dispose(), page would be lost).
When we leave this function, info->used_key_del is always 1 or 2. When we leave this function, info->key_del_used is always 1 or 2.
*/ */
if (info->used_key_del != 1) if (info->key_del_used != 1)
{ {
pthread_mutex_lock(&share->intern_lock); pthread_mutex_lock(&share->key_del_lock);
if (share->state.key_del == HA_OFFSET_ERROR && insert_at_end) if (share->state.key_del == HA_OFFSET_ERROR && insert_at_end)
{ {
pthread_mutex_unlock(&share->intern_lock); pthread_mutex_unlock(&share->key_del_lock);
info->used_key_del= 2; /* insert-with-append */ info->key_del_used= 2; /* insert-with-append */
return 1; return 1;
} }
#ifdef THREAD #ifdef THREAD
while (share->used_key_del) while (share->key_del_used)
pthread_cond_wait(&share->intern_cond, &share->intern_lock); pthread_cond_wait(&share->key_del_cond, &share->key_del_lock);
#endif #endif
info->used_key_del= 1; info->key_del_used= 1;
share->used_key_del= 1; share->key_del_used= 1;
share->current_key_del= share->state.key_del; share->key_del_current= share->state.key_del;
pthread_mutex_unlock(&share->intern_lock); pthread_mutex_unlock(&share->key_del_lock);
} }
return share->current_key_del == HA_OFFSET_ERROR; return share->key_del_current == HA_OFFSET_ERROR;
} }
/** /**
@brief copy changes to key_del and unlock it @brief copy changes to key_del and unlock it
@notes
In case of many threads using the maria table, we always have a lock
on the translog when comming here.
*/ */
void _ma_unlock_key_del(MARIA_HA *info) void _ma_unlock_key_del(MARIA_HA *info)
{ {
DBUG_ASSERT(info->used_key_del); DBUG_ASSERT(info->key_del_used);
if (info->used_key_del == 1) /* Ignore insert-with-append */ if (info->key_del_used == 1) /* Ignore insert-with-append */
{ {
MARIA_SHARE *share= info->s; MARIA_SHARE *share= info->s;
pthread_mutex_lock(&share->intern_lock); pthread_mutex_lock(&share->key_del_lock);
share->used_key_del= 0; share->key_del_used= 0;
share->state.key_del= share->current_key_del; share->state.key_del= share->key_del_current;
pthread_mutex_unlock(&share->intern_lock); pthread_mutex_unlock(&share->key_del_lock);
pthread_cond_signal(&share->intern_cond); pthread_cond_signal(&share->key_del_cond);
} }
info->used_key_del= 0; info->key_del_used= 0;
} }
...@@ -114,6 +114,6 @@ extern my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end); ...@@ -114,6 +114,6 @@ extern my_bool _ma_lock_key_del(MARIA_HA *info, my_bool insert_at_end);
extern void _ma_unlock_key_del(MARIA_HA *info); extern void _ma_unlock_key_del(MARIA_HA *info);
static inline void _ma_fast_unlock_key_del(MARIA_HA *info) static inline void _ma_fast_unlock_key_del(MARIA_HA *info)
{ {
if (info->used_key_del) if (info->key_del_used)
_ma_unlock_key_del(info); _ma_unlock_key_del(info);
} }
...@@ -807,8 +807,9 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags) ...@@ -807,8 +807,9 @@ MARIA_HA *maria_open(const char *name, int mode, uint open_flags)
} }
#ifdef THREAD #ifdef THREAD
thr_lock_init(&share->lock); thr_lock_init(&share->lock);
VOID(pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST)); pthread_mutex_init(&share->intern_lock, MY_MUTEX_INIT_FAST);
VOID(pthread_cond_init(&share->intern_cond, 0)); pthread_mutex_init(&share->key_del_lock, MY_MUTEX_INIT_FAST);
pthread_cond_init(&share->key_del_cond, 0);
for (i=0; i<keys; i++) for (i=0; i<keys; i++)
VOID(my_rwlock_init(&share->keyinfo[i].root_lock, NULL)); VOID(my_rwlock_init(&share->keyinfo[i].root_lock, NULL));
VOID(my_rwlock_init(&share->mmap_lock, NULL)); VOID(my_rwlock_init(&share->mmap_lock, NULL));
...@@ -1215,6 +1216,13 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite) ...@@ -1215,6 +1216,13 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite)
(should only be needed after ALTER TABLE (should only be needed after ALTER TABLE
ENABLE/DISABLE KEYS, and REPAIR/OPTIMIZE). ENABLE/DISABLE KEYS, and REPAIR/OPTIMIZE).
@notes
For transactional multiuser tables, this function is called
with intern_lock & translog_lock or when the last thread who
is using the table is closing it.
Because of the translog_lock we don't need to have a lock on
key_del_lock.
@return Operation status @return Operation status
@retval 0 OK @retval 0 OK
@retval 1 Error @retval 1 Error
......
...@@ -223,8 +223,8 @@ int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read) ...@@ -223,8 +223,8 @@ int _ma_dispose(register MARIA_HA *info, my_off_t pos, my_bool page_not_read)
(void) _ma_lock_key_del(info, 0); (void) _ma_lock_key_del(info, 0);
old_link= share->current_key_del; old_link= share->key_del_current;
share->current_key_del= pos; share->key_del_current= pos;
page_no= pos / block_size; page_no= pos / block_size;
bzero(buff, share->keypage_header); bzero(buff, share->keypage_header);
_ma_store_keynr(share, buff, (uchar) MARIA_DELETE_KEY_NR); _ma_store_keynr(share, buff, (uchar) MARIA_DELETE_KEY_NR);
...@@ -347,7 +347,7 @@ my_off_t _ma_new(register MARIA_HA *info, int level, ...@@ -347,7 +347,7 @@ my_off_t _ma_new(register MARIA_HA *info, int level,
else else
{ {
uchar *buff; uchar *buff;
pos= share->current_key_del; /* Protected */ pos= share->key_del_current; /* Protected */
DBUG_ASSERT(share->pagecache->block_size == block_size); DBUG_ASSERT(share->pagecache->block_size == block_size);
if (!(buff= pagecache_read(share->pagecache, if (!(buff= pagecache_read(share->pagecache,
&share->kfile, &share->kfile,
...@@ -362,15 +362,15 @@ my_off_t _ma_new(register MARIA_HA *info, int level, ...@@ -362,15 +362,15 @@ my_off_t _ma_new(register MARIA_HA *info, int level,
(single linked list): (single linked list):
*/ */
#ifndef DBUG_OFF #ifndef DBUG_OFF
my_off_t current_key_del; my_off_t key_del_current;
#endif #endif
share->current_key_del= mi_sizekorr(buff+share->keypage_header); share->key_del_current= mi_sizekorr(buff+share->keypage_header);
#ifndef DBUG_OFF #ifndef DBUG_OFF
current_key_del= share->current_key_del; key_del_current= share->key_del_current;
DBUG_ASSERT(current_key_del != share->state.key_del && DBUG_ASSERT(key_del_current != share->state.key_del &&
(current_key_del != 0) && (key_del_current != 0) &&
((current_key_del == HA_OFFSET_ERROR) || ((key_del_current == HA_OFFSET_ERROR) ||
(current_key_del <= (key_del_current <=
(share->state.state.key_file_length - block_size)))); (share->state.state.key_file_length - block_size))));
#endif #endif
} }
......
...@@ -1757,11 +1757,11 @@ my_bool _ma_log_new(MARIA_HA *info, my_off_t page, const uchar *buff, ...@@ -1757,11 +1757,11 @@ my_bool _ma_log_new(MARIA_HA *info, my_off_t page, const uchar *buff,
page_store(log_data + FILEID_STORE_SIZE, page); page_store(log_data + FILEID_STORE_SIZE, page);
/* Store link to next unused page */ /* Store link to next unused page */
if (info->used_key_del == 2) if (info->key_del_used == 2)
page= 0; /* key_del not changed */ page= 0; /* key_del not changed */
else else
page= ((share->current_key_del == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO : page= ((share->key_del_current == HA_OFFSET_ERROR) ? IMPOSSIBLE_PAGE_NO :
share->current_key_del / share->block_size); share->key_del_current / share->block_size);
page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page); page_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE, page);
key_nr_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE*2, key_nr); key_nr_store(log_data + FILEID_STORE_SIZE + PAGE_STORE_SIZE*2, key_nr);
......
...@@ -336,7 +336,7 @@ typedef struct st_maria_share ...@@ -336,7 +336,7 @@ typedef struct st_maria_share
size_t (*file_read)(MARIA_HA *, uchar *, size_t, my_off_t, myf); size_t (*file_read)(MARIA_HA *, uchar *, size_t, my_off_t, myf);
size_t (*file_write)(MARIA_HA *, const uchar *, size_t, my_off_t, myf); size_t (*file_write)(MARIA_HA *, const uchar *, size_t, my_off_t, myf);
invalidator_by_filename invalidator; /* query cache invalidator */ invalidator_by_filename invalidator; /* query cache invalidator */
my_off_t current_key_del; /* delete links for index pages */ my_off_t key_del_current; /* delete links for index pages */
ulong this_process; /* processid */ ulong this_process; /* processid */
ulong last_process; /* For table-change-check */ ulong last_process; /* For table-change-check */
ulong last_version; /* Version on start */ ulong last_version; /* Version on start */
...@@ -380,12 +380,13 @@ typedef struct st_maria_share ...@@ -380,12 +380,13 @@ typedef struct st_maria_share
*/ */
my_bool now_transactional; my_bool now_transactional;
my_bool have_versioning; my_bool have_versioning;
my_bool used_key_del; /* != 0 if key_del is locked */ my_bool key_del_used; /* != 0 if key_del is locked */
#ifdef THREAD #ifdef THREAD
THR_LOCK lock; THR_LOCK lock;
void (*lock_restore_status)(void *); void (*lock_restore_status)(void *);
pthread_mutex_t intern_lock; /* Locking for use with _locking */ pthread_mutex_t intern_lock; /* Locking for use with _locking */
pthread_cond_t intern_cond; pthread_mutex_t key_del_lock;
pthread_cond_t key_del_cond;
#endif #endif
my_off_t mmaped_length; my_off_t mmaped_length;
uint nonmmaped_inserts; /* counter of writing in uint nonmmaped_inserts; /* counter of writing in
...@@ -534,7 +535,7 @@ struct st_maria_handler ...@@ -534,7 +535,7 @@ struct st_maria_handler
int save_lastinx; int save_lastinx;
uint preload_buff_size; /* When preloading indexes */ uint preload_buff_size; /* When preloading indexes */
uint16 last_used_keyseg; /* For MARIAMRG */ uint16 last_used_keyseg; /* For MARIAMRG */
uint8 used_key_del; /* != 0 if key_del is used */ uint8 key_del_used; /* != 0 if key_del is used */
my_bool was_locked; /* Was locked in panic */ my_bool was_locked; /* Was locked in panic */
my_bool append_insert_at_end; /* Set if concurrent insert */ my_bool append_insert_at_end; /* Set if concurrent insert */
my_bool quick_mode; my_bool quick_mode;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment