1. 27 Jan, 2021 4 commits
    • Jan Lindström's avatar
      MDEV-24704 : Galera test failure on galera.galera_nopk_unicode · 75546dfb
      Jan Lindström authored
      Analysis:
      =========
      
      Reason for test failure was a mutex deadlock between DeadlockChecker with stack
      
      Thread 6 (Thread 0xffff70066070 (LWP 24667)):
      0  0x0000ffff784e850c in __lll_lock_wait (futex=futex@entry=0xffff04002258, private=0) at lowlevellock.c:46
      1  0x0000ffff784e19f0 in __GI___pthread_mutex_lock (mutex=mutex@entry=0xffff04002258) at pthread_mutex_lock.c:135
      2  0x0000aaaaac8cd014 in inline_mysql_mutex_lock (src_file=0xaaaaacea0f28 "/home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_thd.cc", src_line=762, that=0xffff04002258) at /home/buildbot/buildbot/build/mariadb-10.2.37/include/mysql/psi/mysql_thread.h:675
      3  wsrep_thd_is_BF (thd=0xffff040009a8, sync=sync@entry=1 '\001') at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_thd.cc:762
      4  0x0000aaaaacadce68 in lock_rec_has_to_wait (for_locking=false, lock_is_on_supremum=<optimized out>, lock2=0xffff628952d0, type_mode=291, trx=0xffff62894070) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:826
      5  lock_has_to_wait (lock1=<optimized out>, lock2=0xffff628952d0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:873
      6  0x0000aaaaacadd0b0 in DeadlockChecker::search (this=this@entry=0xffff70061fe8) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:7142
      7  0x0000aaaaacae2dd8 in DeadlockChecker::check_and_resolve (lock=lock@entry=0xffff62894120, trx=trx@entry=0xffff62894070) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:7286
      8  0x0000aaaaacae3070 in lock_rec_enqueue_waiting (c_lock=0xffff628952d0, type_mode=type_mode@entry=3, block=block@entry=0xffff62076c40, heap_no=heap_no@entry=2, index=index@entry=0xffff4c076f28, thr=thr@entry=0xffff4c078810, prdt=prdt@entry=0x0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:1796
      9  0x0000aaaaacae3900 in lock_rec_lock_slow (thr=0xffff4c078810, index=0xffff4c076f28, heap_no=2, block=0xffff62076c40, mode=3, impl=0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:2106
      10 lock_rec_lock (impl=false, mode=3, block=0xffff62076c40, heap_no=2, index=0xffff4c076f28, thr=0xffff4c078810) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:2168
      11 0x0000aaaaacae3ee8 in lock_sec_rec_read_check_and_lock (flags=flags@entry=0, block=block@entry=0xffff62076c40, rec=rec@entry=0xffff6240407f "\303\221\342\200\232\303\220\302\265\303\220\302\272\303\221\302\201\303\221\342\200\232", index=index@entry=0xffff4c076f28, offsets=0xffff4c080690, offsets@entry=0xffff70062a30, mode=LOCK_X, mode@entry=1653162096, gap_mode=0, gap_mode@entry=281470749427104, thr=thr@entry=0xffff4c078810) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/lock/lock0lock.cc:6082
      12 0x0000aaaaacb684c4 in sel_set_rec_lock (pcur=0xaaaac841c270, pcur@entry=0xffff4c077d58, rec=0xffff6240407f "\303\221\342\200\232\303\220\302\265\303\220\302\272\303\221\302\201\303\221\342\200\232", rec@entry=0x28 <error: Cannot access memory at address 0x28>, index=index@entry=0xffff4c076f28, offsets=0xffff70062a30, mode=281472334905456, type=281470749427104, thr=0xffff4c078810, thr@entry=0x9f, mtr=0x0, mtr@entry=0xffff70063928) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/row/row0sel.cc:1270
      13 0x0000aaaaacb6bb64 in row_search_mvcc (buf=buf@entry=0xffff4c080690 "\376\026", mode=mode@entry=PAGE_CUR_GE, prebuilt=0xffff4c077b98, match_mode=match_mode@entry=1, direction=direction@entry=0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/row/row0sel.cc:5181
      14 0x0000aaaaacaae568 in ha_innobase::index_read (this=0xffff4c038a80, buf=0xffff4c080690 "\376\026", key_ptr=<optimized out>, key_len=768, find_flag=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/handler/ha_innodb.cc:9393
      15 0x0000aaaaac9201cc in handler::ha_index_read_map (this=0xffff4c038a80, buf=0xffff4c080690 "\376\026", key=0xffff4c07ccf8 "", keypart_map=keypart_map@entry=18446744073709551615, find_flag=find_flag@entry=HA_READ_KEY_EXACT) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/handler.cc:2718
      16 0x0000aaaaac9f36b0 in Rows_log_event::find_row (this=this@entry=0xffff4c030098, rgi=rgi@entry=0xffff4c01b510) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/log_event.cc:13461
      17 0x0000aaaaac9f3e44 in Update_rows_log_event::do_exec_row (this=0xffff4c030098, rgi=0xffff4c01b510) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/log_event.cc:13936
      18 0x0000aaaaac9e7ee8 in Rows_log_event::do_apply_event (this=0xffff4c030098, rgi=0xffff4c01b510) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/log_event.cc:11101
      19 0x0000aaaaac8ca4e8 in Log_event::apply_event (rgi=0xffff4c01b510, this=0xffff4c030098) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/log_event.h:1454
      20 wsrep_apply_events (buf_len=0, events_buf=0x1, thd=0xffff4c0009a8) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_applier.cc:164
      21 wsrep_apply_cb (ctx=0xffff4c0009a8, buf=0x1, buf_len=18446743528248705000, flags=<optimized out>, meta=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_applier.cc:267
      22 0x0000ffff7322d29c in galera::TrxHandle::apply (this=this@entry=0xffff4c027960, recv_ctx=recv_ctx@entry=0xffff4c0009a8, apply_cb=apply_cb@entry=0xaaaaac8c9fe8 <wsrep_apply_cb(void*, void const*, size_t, uint32_t, wsrep_trx_meta_t const*)>, meta=...) at /home/buildbot/buildbot/build/galera/src/trx_handle.cpp:317
      23 0x0000ffff73239664 in apply_trx_ws (recv_ctx=recv_ctx@entry=0xffff4c0009a8, apply_cb=0xaaaaac8c9fe8 <wsrep_apply_cb(void*, void const*, size_t, uint32_t, wsrep_trx_meta_t const*)>, commit_cb=0xaaaaac8ca8d0 <wsrep_commit_cb(void*, uint32_t, wsrep_trx_meta_t const*, wsrep_bool_t*, bool)>, trx=..., meta=...) at /home/buildbot/buildbot/build/galera/src/replicator_smm.cpp:34
      24 0x0000ffff7323c0c4 in galera::ReplicatorSMM::apply_trx (this=this@entry=0xaaaac7c7ebc0, recv_ctx=recv_ctx@entry=0xffff4c0009a8, trx=trx@entry=0xffff4c027960) at /home/buildbot/buildbot/build/galera/src/replicator_smm.cpp:454
      25 0x0000ffff7323e8b8 in galera::ReplicatorSMM::process_trx (this=0xaaaac7c7ebc0, recv_ctx=0xffff4c0009a8, trx=0xffff4c027960) at /home/buildbot/buildbot/build/galera/src/replicator_smm.cpp:1258
      26 0x0000ffff73268f68 in galera::GcsActionSource::dispatch (this=this@entry=0xaaaac7c7f348, recv_ctx=recv_ctx@entry=0xffff4c0009a8, act=..., exit_loop=@0xffff7006535f: false) at /home/buildbot/buildbot/build/galera/src/gcs_action_source.cpp:115
      27 0x0000ffff73269dd0 in galera::GcsActionSource::process (this=0xaaaac7c7f348, recv_ctx=0xffff4c0009a8, exit_loop=@0xffff7006535f: false) at /home/buildbot/buildbot/build/galera/src/gcs_action_source.cpp:180
      28 0x0000ffff7323ef5c in galera::ReplicatorSMM::async_recv (this=0xaaaac7c7ebc0, recv_ctx=0xffff4c0009a8) at /home/buildbot/buildbot/build/galera/src/replicator_smm.cpp:362
      29 0x0000ffff73217760 in galera_recv (gh=<optimized out>, recv_ctx=<optimized out>) at /home/buildbot/buildbot/build/galera/src/wsrep_provider.cpp:244
      30 0x0000aaaaac8cb344 in wsrep_replication_process (thd=0xffff4c0009a8) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_thd.cc:486
      31 0x0000aaaaac8bc3a0 in start_wsrep_THD (arg=arg@entry=0xaaaac7cb3e38) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/wsrep_mysqld.cc:2173
      32 0x0000aaaaaca89198 in pfs_spawn_thread (arg=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/perfschema/pfs.cc:1869
      33 0x0000ffff784defc4 in start_thread (arg=0xaaaaaca890d8 <pfs_spawn_thread(void*)>) at pthread_create.c:335
      34 0x0000ffff7821c3f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:89
      
      and background victim transaction kill with stack
      
      Thread 28 (Thread 0xffff485fa070 (LWP 24870)):
      0  0x0000ffff784e530c in __pthread_cond_wait (cond=cond@entry=0xaaaac83e98e0, mutex=mutex@entry=0xaaaac83e98b0) at pthread_cond_wait.c:186
      1  0x0000aaaaacb10788 in os_event::wait (this=0xaaaac83e98a0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/os/os0event.cc:158
      2  os_event::wait_low (reset_sig_count=2, this=0xaaaac83e98a0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/os/os0event.cc:325
      3  os_event_wait_low (event=0xaaaac83e98a0, reset_sig_count=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/os/os0event.cc:507
      4  0x0000aaaaacb98480 in sync_array_wait_event (arr=arr@entry=0xaaaac7dbb450, cell=@0xffff485f96e8: 0xaaaac7dbb560) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/sync/sync0arr.cc:471
      5  0x0000aaaaacab53c8 in TTASEventMutex<GenericPolicy>::enter (line=19524, filename=0xaaaaacf2ce40 "/home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/handler/ha_innodb.cc", max_delay=<optimized out>, max_spins=0, this=0xaaaac83cc8c0) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/include/ib0mutex.h:516
      6  PolicyMutex<TTASEventMutex<GenericPolicy> >::enter (this=0xaaaac83cc8c0, n_spins=<optimized out>, n_delay=<optimized out>, name=0xaaaaacf2ce40 "/home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/handler/ha_innodb.cc", line=19524) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/include/ib0mutex.h:637
      7  0x0000aaaaacaaa52c in bg_wsrep_kill_trx (void_arg=0xffff4c057430) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/innobase/handler/ha_innodb.cc:19524
      8  0x0000aaaaac79e7f0 in handle_manager (arg=arg@entry=0x0) at /home/buildbot/buildbot/build/mariadb-10.2.37/sql/sql_manager.cc:112
      9  0x0000aaaaaca89198 in pfs_spawn_thread (arg=<optimized out>) at /home/buildbot/buildbot/build/mariadb-10.2.37/storage/perfschema/pfs.cc:1869
      10 0x0000ffff784defc4 in start_thread (arg=0xaaaaaca890d8 <pfs_spawn_thread(void*)>) at pthread_create.c:335
      11 0x0000ffff7821c3f0 in thread_start () at ../sysdeps/unix/sysv/linux/aarch64/clone.S:89
      
      Fix:
      ====
      
      Do not use THD::LOCK_thd_data mutex if we already hold lock_sys->mutex because it
      will cause mutexing order violation. Victim transaction holding conflicting
      locks can't be committed or rolled back while we hold lock_sys->mutex. Thus,
      it is safe to do wsrep_thd_is_BF call with no additional mutexes.
      75546dfb
    • Rucha Deodhar's avatar
      MDEV-20939: Race condition between mysqldump import and InnoDB persistent · cbc75e99
      Rucha Deodhar authored
      statistics calculation
      
       Analysis: When --replace or --insert-ignore is not given, dumping of
      mysql.innodb_index_stats and mysql.innodb_table_stats will result into race
      condition.
      Fix: Check if these options are present with --system=stats (because dumping
      under --system=stats is safe). Otherwise, dump only structure, ignoring data
      because innodb will recalculate data anyway.
      cbc75e99
    • Marko Mäkelä's avatar
      MDEV-24709 Assertion !recv_no_ibuf_operations failed in ibuf_page_low() · 5fd3c747
      Marko Mäkelä authored
      recv_recovery_from_checkpoint_start(): Clear the recv_no_ibuf_operations
      flag at the same time when we enabled writes to the log.
      
      The failure to clear the flag might have caused some missed
      change buffer merges, at least to the secondary index of SYS_TABLES
      that were accessed by trx_resurrect_table_locks() while the last
      recovery batch was in progress.
      
      Thanks to Thirunarayanan Balathandayuthapani for suggesting this fix.
      5fd3c747
    • Marko Mäkelä's avatar
      MDEV-24184 preparation: InnoDB RENAME TABLE recovery failure · 5b93a483
      Marko Mäkelä authored
      fil_rename_tablespace(): Do not write a redundant MLOG_FILE_RENAME2
      record.
      
      The recovery bug will be fixed later. The problem is that we are
      invoking fil_op_replay_rename() too often, while we should skip
      any 'intermediate' names of a tablespace and only apply the
      very last rename for each tablespace identifier, and only if
      the tablespace name is not already correct.
      5b93a483
  2. 26 Jan, 2021 2 commits
  3. 25 Jan, 2021 2 commits
  4. 24 Jan, 2021 6 commits
    • Sergei Golubchik's avatar
      mtr: --client-gdb='<gdb commands>' · 1a999585
      Sergei Golubchik authored
      1a999585
    • Sergei Golubchik's avatar
      MDEV-23328 Server hang due to Galera lock conflict resolution · 29bbcac0
      Sergei Golubchik authored
      mutex order violation here.
      when wsrep bf thread kills a conflicting trx, the stack is
      
        wsrep_thd_LOCK()
        wsrep_kill_victim()
        lock_rec_other_has_conflicting()
        lock_clust_rec_read_check_and_lock()
        row_search_mvcc()
        ha_innobase::index_read()
        ha_innobase::rnd_pos()
        handler::ha_rnd_pos()
        handler::rnd_pos_by_record()
        handler::ha_rnd_pos_by_record()
        Rows_log_event::find_row()
        Update_rows_log_event::do_exec_row()
        Rows_log_event::do_apply_event()
        Log_event::apply_event()
        wsrep_apply_events()
      
      and mutexes are taken in the order
      
        lock_sys->mutex -> victim_trx->mutex -> victim_thread->LOCK_thd_data
      
      When a normal KILL statement is executed, the stack is
      
        innobase_kill_query()
        kill_handlerton()
        plugin_foreach_with_mask()
        ha_kill_query()
        THD::awake()
        kill_one_thread()
      
      and mutexes are
      
        victim_thread->LOCK_thd_data -> lock_sys->mutex -> victim_trx->mutex
      
      To fix the mutex order violation we kill the victim thd asynchronously,
      from the manager thread
      29bbcac0
    • Sergei Golubchik's avatar
      cleanup: void hton::abort_transaction() · 5d1db345
      Sergei Golubchik authored
      and void wsrep_innobase_kill_one_trx()
      
      as their return values are never used.
      Also remove redundant cast and checks that are always true
      5d1db345
    • Sergei Golubchik's avatar
    • Sergei Golubchik's avatar
      cleanup: fix and generalize handle_manager thread · 990eb093
      Sergei Golubchik authored
      * provide an argument to the callback
      * don't ignore a callback request if it's already present in the queue
      * initialize mutex/cond/in_use flag before starting the thread,
        in case the first callback queueing request arrives before
        handle_manager had time to initialize
      * set/check abort_manager under a mutex, otherwise handle_manager
        thread might destroy LOCK_manager before stop_handle_manager
        released it
      * signal COND on queueing a callback, stop cond_wait on callback request
      * always start the thread, even if flush_time is 0
      * but keep the old behavior in embedded (no replication, no galera)
      * style cleanups (e.g. remove volatile for a variable protected by a mutex)
      990eb093
    • Sergei Golubchik's avatar
      don't allow `KILL QUERY ID USER xxx` · 4a7e6229
      Sergei Golubchik authored
      4a7e6229
  5. 23 Jan, 2021 1 commit
  6. 22 Jan, 2021 4 commits
  7. 21 Jan, 2021 4 commits
    • Daniel Black's avatar
      MDEV-10272: add master host/port info to slave thread exit messages · 29d9897f
      Daniel Black authored
      Sample log error message generated:
      
      2021-01-21  2:33:24 139912137520896 [Note] Slave SQL thread exiting, replication stopped in log 'master-bin.000001' at position 369
      33:24 139912137520896 [Note] master was 127.0.0.1:16400
      2021-01-21  2:33:24 139912137828096 [Note] Slave I/O thread exiting, read up to log 'master-bin.000001', position 369
      2021-01-21  2:33:24 139912137828096 [Note] master was 127.0.0.1:16400
      
      Based on work by Hartmut Holzgraefe.
      
      Reviewer: knielsen@knielsen-hq.org, Andrei, Sachin
      29d9897f
    • Sujatha's avatar
      MDEV-8134: The relay-log is not flushed after the slave-relay-log.999999 showed · eb75e870
      Sujatha authored
      Problem:
      ========
      Auto purge of relaylogs stops when relay-log-file is
      'slave-relay-log.999999' and slave_parallel_threads is enabled.
      
      Analysis:
      =========
      The problem is that in Relay_log_info::inc_group_relay_log_pos() function,
      when two log names are compared via strcmp() function, it gives correct
      result, when log name sequence numbers are of same digits(6 digits), But
      when the number goes to 7 digits, a 999999 compares greater than
      1000000, which is wrong, hence the bug.
      
      Fix:
      ====
      Extract the numeric extension part of the file name, convert it into
      unsigned long and compare.
      
      Thanks to David Zhao for the contribution.
      eb75e870
    • Daniel Black's avatar
      maria: ma_recovery cppcheck va_start called twice · 53acd1c1
      Daniel Black authored
      Per cppcheck, va_start is called twice which it is.
      
      Remove the second instance.
      53acd1c1
    • Daniel Black's avatar
      ucs2: cppcheck - add va_end · f2fea295
      Daniel Black authored
      f2fea295
  8. 19 Jan, 2021 8 commits
  9. 18 Jan, 2021 1 commit
    • sjaakola's avatar
      MDEV-23851 BF-BF Conflict issue because of UK GAP locks · beaea31a
      sjaakola authored
      Some DML operations on tables having unique secondary keys cause scanning
      in the secondary index, for instance to find potential unique key violations
      in the seconday index. This scanning may involve GAP locking in the index.
      As this locking happens also when applying replication events in high priority
      applier threads, there is a probabality for lock conflicts between two wsrep
      high priority threads.
      
      This PR avoids lock conflicts of high priority wsrep threads, which do
      secondary index scanning e.g. for duplicate key detection.
      
      The actual fix is the patch in sql_class.cc:thd_need_ordering_with(), where
      we allow relaxed GAP locking protocol between wsrep high priority threads.
      wsrep high priority threads (replication appliers, replayers and TOI processors)
      are ordered by the replication provider, and they will not need serializability
      support gained by secondary index GAP locks.
      
      PR contains also a mtr test, which exercises a scenario where two replication
      applier threads have a false positive conflict in GAP of unique secondary index.
      The conflicting local committing transaction has to replay, and the test verifies
      also that the replaying phase will not conflict with the latter repllication applier.
      Commit also contains new test scenario for galera.galera_UK_conflict.test,
      where replayer starts applying after a slave applier thread, with later seqno,
      has advanced to commit phase. The applier and replayer have false positive GAP
      lock conflict on secondary unique index, and replayer should ignore this.
      This test scenario caused crash with earlier version in this PR, and to fix this,
      the secondary index uniquenes checking has been relaxed even further.
      
      Now innodb trx_t structure has new member: bool wsrep_UK_scan, which is set to
      true, when high priority thread is performing unique secondary index scanning.
      The member trx_t::wsrep_UK_scan is defined inside WITH_WSREP directive, to make
      it possible to prepare a MariaDB build where this additional trx_t member is
      not present and is not used in the code base. trx->wsrep_UK_scan is set to true
      only for the duration of function call for: lock_rec_lock() trx->wsrep_UK_scan
      is used only in lock_rec_has_to_wait() function to relax the need to wait if
      wsrep_UK_scan is set and conflicting transaction is also high priority.
      Reviewed-by: default avatarJan Lindström <jan.lindstrom@mariadb.com>
      beaea31a
  10. 15 Jan, 2021 2 commits
  11. 14 Jan, 2021 2 commits
  12. 13 Jan, 2021 3 commits
  13. 12 Jan, 2021 1 commit