• Jan Lindström's avatar
    MDEV-24704 : Galera test failure on galera.galera_nopk_unicode · 75546dfb
    Jan Lindström authored
    Analysis:
    =========
    
    Reason for test failure was a mutex deadlock between DeadlockChecker with stack
    
    Thread 6 (Thread 0xffff70066070 (LWP 24667)):
    0  0x0000ffff784e850c in __lll_lock_wait (futex=futex@entry=0xffff04002258, private=0) at lowlevellock.c:46
    1  0x0000ffff784e19f0 in __GI___pthread_mutex_lock (mutex=mutex@entry=0xffff04002258) at pthread_mutex_lock.c:135
    2  0x0000aaaaac8cd014 in inline_mysql_mutex_lock (src_file=0xaaaaacea0f28 "/home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_thd.cc", src_line=762, that=0xffff04002258) at /home/buildbot/buildbot/build/mariadb-10.2.37/include/mysql/psi/mysql_thread.h:675
    3  wsrep_thd_is_BF (thd=0xffff040009a8, sync=sync@entry=1 '\001') at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_thd.cc:762
    4  0x0000aaaaacadce68 in lock_rec_has_to_wait (for_locking=false, lock_is_on_supremum=<optimized out>, lock2=0xffff628952d0, type_mode=291, trx=0xffff62894070) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:826
    5  lock_has_to_wait (lock1=<optimized out>, lock2=0xffff628952d0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:873
    6  0x0000aaaaacadd0b0 in DeadlockChecker::search (this=this@entry=0xffff70061fe8) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:7142
    7  0x0000aaaaacae2dd8 in DeadlockChecker::check_and_resolve (lock=lock@entry=0xffff62894120, trx=trx@entry=0xffff62894070) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:7286
    8  0x0000aaaaacae3070 in lock_rec_enqueue_waiting (c_lock=0xffff628952d0, type_mode=type_mode@entry=3, block=block@entry=0xffff62076c40, heap_no=heap_no@entry=2, index=index@entry=0xffff4c076f28, thr=thr@entry=0xffff4c078810, prdt=prdt@entry=0x0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:1796
    9  0x0000aaaaacae3900 in lock_rec_lock_slow (thr=0xffff4c078810, index=0xffff4c076f28, heap_no=2, block=0xffff62076c40, mode=3, impl=0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:2106
    10 lock_rec_lock (impl=false, mode=3, block=0xffff62076c40, heap_no=2, index=0xffff4c076f28, thr=0xffff4c078810) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:2168
    11 0x0000aaaaacae3ee8 in lock_sec_rec_read_check_and_lock (flags=flags@entry=0, block=block@entry=0xffff62076c40, rec=rec@entry=0xffff6240407f "\303\221\342\200\232\303\220\302\265\303\220\302\272\303\221\302\201\303\221\342\200\232", index=index@entry=0xffff4c076f28, offsets=0xffff4c080690, offsets@entry=0xffff70062a30, mode=LOCK_X, mode@entry=1653162096, gap_mode=0, gap_mode@entry=281470749427104, thr=thr@entry=0xffff4c078810) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:6082
    12 0x0000aaaaacb684c4 in sel_set_rec_lock (pcur=0xaaaac841c270, pcur@entry=0xffff4c077d58, rec=0xffff6240407f "\303\221\342\200\232\303\220\302\265\303\220\302\272\303\221\302\201\303\221\342\200\232", rec@entry=0x28 <error: Cannot access memory at address 0x28>, index=index@entry=0xffff4c076f28, offsets=0xffff70062a30, mode=281472334905456, type=281470749427104, thr=0xffff4c078810, thr@entry=0x9f, mtr=0x0, mtr@entry=0xffff70063928) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/row/row0sel.cc:1270
    13 0x0000aaaaacb6bb64 in row_search_mvcc (buf=buf@entry=0xffff4c080690 "\376\026", mode=mode@entry=PAGE_CUR_GE, prebuilt=0xffff4c077b98, match_mode=match_mode@entry=1, direction=direction@entry=0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/row/row0sel.cc:5181
    14 0x0000aaaaacaae568 in ha_innobase::index_read (this=0xffff4c038a80, buf=0xffff4c080690 "\376\026", key_ptr=<optimized out>, key_len=768, find_flag=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/handler/ha_innodb.cc:9393
    15 0x0000aaaaac9201cc in handler::ha_index_read_map (this=0xffff4c038a80, buf=0xffff4c080690 "\376\026", key=0xffff4c07ccf8 "", keypart_map=keypart_map@entry=18446744073709551615, find_flag=find_flag@entry=HA_READ_KEY_EXACT) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/handler.cc:2718
    16 0x0000aaaaac9f36b0 in Rows_log_event::find_row (this=this@entry=0xffff4c030098, rgi=rgi@entry=0xffff4c01b510) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/log_event.cc:13461
    17 0x0000aaaaac9f3e44 in Update_rows_log_event::do_exec_row (this=0xffff4c030098, rgi=0xffff4c01b510) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/log_event.cc:13936
    18 0x0000aaaaac9e7ee8 in Rows_log_event::do_apply_event (this=0xffff4c030098, rgi=0xffff4c01b510) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/log_event.cc:11101
    19 0x0000aaaaac8ca4e8 in Log_event::apply_event (rgi=0xffff4c01b510, this=0xffff4c030098) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/log_event.h:1454
    20 wsrep_apply_events (buf_len=0, events_buf=0x1, thd=0xffff4c0009a8) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_applier.cc:164
    21 wsrep_apply_cb (ctx=0xffff4c0009a8, buf=0x1, buf_len=18446743528248705000, flags=<optimized out>, meta=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_applier.cc:267
    22 0x0000ffff7322d29c in galera::TrxHandle::apply (this=this@entry=0xffff4c027960, recv_ctx=recv_ctx@entry=0xffff4c0009a8, apply_cb=apply_cb@entry=0xaaaaac8c9fe8 <wsrep_apply_cb(void*, void const*, size_t, uint32_t, wsrep_trx_meta_t const*)>, meta=...) at /home/buildbot/buildbot/build/galera/src/trx_handle.cpp:317
    23 0x0000ffff73239664 in apply_trx_ws (recv_ctx=recv_ctx@entry=0xffff4c0009a8, apply_cb=0xaaaaac8c9fe8 <wsrep_apply_cb(void*, void const*, size_t, uint32_t, wsrep_trx_meta_t const*)>, commit_cb=0xaaaaac8ca8d0 <wsrep_commit_cb(void*, uint32_t, wsrep_trx_meta_t const*, wsrep_bool_t*, bool)>, trx=..., meta=...) at /home/buildbot/buildbot/build/galera/src/replicator_smm.cpp:34
    24 0x0000ffff7323c0c4 in galera::ReplicatorSMM::apply_trx (this=this@entry=0xaaaac7c7ebc0, recv_ctx=recv_ctx@entry=0xffff4c0009a8, trx=trx@entry=0xffff4c027960) at /home/buildbot/buildbot/build/galera/src/replicator_smm.cpp:454
    25 0x0000ffff7323e8b8 in galera::ReplicatorSMM::process_trx (this=0xaaaac7c7ebc0, recv_ctx=0xffff4c0009a8, trx=0xffff4c027960) at /home/buildbot/buildbot/build/galera/src/replicator_smm.cpp:1258
    26 0x0000ffff73268f68 in galera::GcsActionSource::dispatch (this=this@entry=0xaaaac7c7f348, recv_ctx=recv_ctx@entry=0xffff4c0009a8, act=..., exit_loop=@0xffff7006535f: false) at /home/buildbot/buildbot/build/galera/src/gcs_action_source.cpp:115
    27 0x0000ffff73269dd0 in galera::GcsActionSource::process (this=0xaaaac7c7f348, recv_ctx=0xffff4c0009a8, exit_loop=@0xffff7006535f: false) at /home/buildbot/buildbot/build/galera/src/gcs_action_source.cpp:180
    28 0x0000ffff7323ef5c in galera::ReplicatorSMM::async_recv (this=0xaaaac7c7ebc0, recv_ctx=0xffff4c0009a8) at /home/buildbot/buildbot/build/galera/src/replicator_smm.cpp:362
    29 0x0000ffff73217760 in galera_recv (gh=<optimized out>, recv_ctx=<optimized out>) at /home/buildbot/buildbot/build/galera/src/wsrep_provider.cpp:244
    30 0x0000aaaaac8cb344 in wsrep_replication_process (thd=0xffff4c0009a8) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_thd.cc:486
    31 0x0000aaaaac8bc3a0 in start_wsrep_THD (arg=arg@entry=0xaaaac7cb3e38) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_mysqld.cc:2173
    32 0x0000aaaaaca89198 in pfs_spawn_thread (arg=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/perfschema/pfs.cc:1869
    33 0x0000ffff784defc4 in start_thread (arg=0xaaaaaca890d8 <pfs_spawn_thread(void*)>) at pthread_create.c:335
    34 0x0000ffff7821c3f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:89
    
    and background victim transaction kill with stack
    
    Thread 28 (Thread 0xffff485fa070 (LWP 24870)):
    0  0x0000ffff784e530c in __pthread_cond_wait (cond=cond@entry=0xaaaac83e98e0, mutex=mutex@entry=0xaaaac83e98b0) at pthread_cond_wait.c:186
    1  0x0000aaaaacb10788 in os_event::wait (this=0xaaaac83e98a0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/os/os0event.cc:158
    2  os_event::wait_low (reset_sig_count=2, this=0xaaaac83e98a0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/os/os0event.cc:325
    3  os_event_wait_low (event=0xaaaac83e98a0, reset_sig_count=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/os/os0event.cc:507
    4  0x0000aaaaacb98480 in sync_array_wait_event (arr=arr@entry=0xaaaac7dbb450, cell=@0xffff485f96e8: 0xaaaac7dbb560) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/sync/sync0arr.cc:471
    5  0x0000aaaaacab53c8 in TTASEventMutex<GenericPolicy>::enter (line=19524, filename=0xaaaaacf2ce40 "/home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/handler/ha_innodb.cc", max_delay=<optimized out>, max_spins=0, this=0xaaaac83cc8c0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/include/ib0mutex.h:516
    6  PolicyMutex<TTASEventMutex<GenericPolicy> >::enter (this=0xaaaac83cc8c0, n_spins=<optimized out>, n_delay=<optimized out>, name=0xaaaaacf2ce40 "/home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/handler/ha_innodb.cc", line=19524) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/include/ib0mutex.h:637
    7  0x0000aaaaacaaa52c in bg_wsrep_kill_trx (void_arg=0xffff4c057430) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/handler/ha_innodb.cc:19524
    8  0x0000aaaaac79e7f0 in handle_manager (arg=arg@entry=0x0) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/sql_manager.cc:112
    9  0x0000aaaaaca89198 in pfs_spawn_thread (arg=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/perfschema/pfs.cc:1869
    10 0x0000ffff784defc4 in start_thread (arg=0xaaaaaca890d8 <pfs_spawn_thread(void*)>) at pthread_create.c:335
    11 0x0000ffff7821c3f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:89
    
    Fix:
    ====
    
    Do not use THD::LOCK_thd_data mutex if we already hold lock_sys->mutex because it
    will cause mutexing order violation. Victim transaction holding conflicting
    locks can't be committed or rolled back while we hold lock_sys->mutex. Thus,
    it is safe to do wsrep_thd_is_BF call with no additional mutexes.
    75546dfb
lock0lock.cc 199 KB