MDEV-6680: Performance of domain_parallel replication is disappointing

The code that handles free lists of various objects passed to worker threads in parallel replication handles freeing in batches, to avoid taking and releasing LOCK_rpl_thread too often. However, it was possible for freeing to be delayed to the point where one thread could stall the SQL driver thread due to full queue, while other worker threads might be idle. This could significantly degrade possible parallelism and thus performance. Clean up the batch freeing code so that it is more robust and now able to regularly free batches of object, so that normally the queue will not run full unless the SQL driver thread is really far ahead of the worker threads.

MDEV-6680: Performance of domain_parallel replication is disappointing
The code that handles free lists of various objects passed to worker threads in parallel replication handles freeing in batches, to avoid taking and releasing LOCK_rpl_thread too often. However, it was possible for freeing to be delayed to the point where one thread could stall the SQL driver thread due to full queue, while other worker threads might be idle. This could significantly degrade possible parallelism and thus performance. Clean up the batch freeing code so that it is more robust and now able to regularly free batches of object, so that normally the queue will not run full unless the SQL driver thread is really far ahead of the worker threads.
eec04fb4 · Kristian Nielsen · 8a3e2f29 · eec04fb4 · eec04fb4 · eec04fb4
Commit eec04fb4 authored Nov 13, 2014 by Kristian Nielsen
Showing with 191 additions and 91 deletions

sql/rpl_parallel.cc sql/rpl_parallel.cc +140 -90

sql/rpl_parallel.h sql/rpl_parallel.h +49 -1

sql/rpl_rli.cc sql/rpl_rli.cc +1 -0

sql/rpl_rli.h sql/rpl_rli.h +1 -0

No files found.
--- a/sql/rpl_parallel.cc
+++ b/sql/rpl_parallel.cc
--- a/sql/rpl_parallel.h
+++ b/sql/rpl_parallel.h
@@ -96,9 +96,28 @@ struct rpl_parallel_thread {
    size_t event_size;
  } *event_queue, *last_in_queue;
  uint64 queued_size;
+  /* These free lists are protected by LOCK_rpl_thread. */
  queued_event *qev_free_list;
  rpl_group_info *rgi_free_list;
  group_commit_orderer *gco_free_list;
+  /*
+    These free lists are local to the thread, so need not be protected by any
+    lock. They are moved to the global free lists in batches in the function
+    batch_free(), to reduce LOCK_rpl_thread contention.
+    The lists are not NULL-terminated (as we do not need to traverse them).
+    Instead, if they are non-NULL, the loc_XXX_last_ptr_ptr points to the
+    `next' pointer of the last element, which is used to link into the front
+    of the global freelists.
+  */
+  queued_event *loc_qev_list, **loc_qev_last_ptr_ptr;
+  size_t loc_qev_size;
+  uint64 qev_free_pending;
+  rpl_group_info *loc_rgi_list, **loc_rgi_last_ptr_ptr;
+  group_commit_orderer *loc_gco_list, **loc_gco_last_ptr_ptr;
+  /* These keep track of batch update of inuse_relaylog refcounts. */
+  inuse_relaylog *accumulated_ir_last;
+  uint64 accumulated_ir_count;
  void enqueue(queued_event *qev)
  {
@@ -127,12 +146,41 @@ struct rpl_parallel_thread {
  queued_event *retry_get_qev(Log_event *ev, queued_event *orig_qev,
                              const char *relay_log_name,
                              ulonglong event_pos, ulonglong event_size);
+  /*
+    Put a qev on the local free list, to be later released to the global free
+    list by batch_free().
+  */
+  void loc_free_qev(queued_event *qev);
+  /*
+    Release an rgi immediately to the global free list. Requires holding the
+    LOCK_rpl_thread mutex.
+  */
  void free_qev(queued_event *qev);
  rpl_group_info *get_rgi(Relay_log_info *rli, Gtid_log_event *gtid_ev,
                          rpl_parallel_entry *e, ulonglong event_size);
+  /*
+    Put an gco on the local free list, to be later released to the global free
+    list by batch_free().
+  */
+  void loc_free_rgi(rpl_group_info *rgi);
+  /*
+    Release an rgi immediately to the global free list. Requires holding the
+    LOCK_rpl_thread mutex.
+  */
  void free_rgi(rpl_group_info *rgi);
  group_commit_orderer *get_gco(uint64 wait_count, group_commit_orderer *prev);
-  void free_gco(group_commit_orderer *gco);
+  /*
+    Put a gco on the local free list, to be later released to the global free
+    list by batch_free().
+  */
+  void loc_free_gco(group_commit_orderer *gco);
+  /*
+    Move all local free lists to the global ones. Requires holding
+    LOCK_rpl_thread.
+  */
+  void batch_free();
+  /* Update inuse_relaylog refcounts with what we have accumulated so far. */
+  void inuse_relaylog_refcount_update();
 };

--- a/sql/rpl_rli.cc
+++ b/sql/rpl_rli.cc
@@ -1390,6 +1390,7 @@ Relay_log_info::alloc_inuse_relaylog(const char *name)
    my_error(ER_OUTOFMEMORY, MYF(0), (int)sizeof(*ir));
    return 1;
  }
+  ir->rli= this;
  strmake_buf(ir->name, name);
  if (!inuse_relaylog_list)

--- a/sql/rpl_rli.h
+++ b/sql/rpl_rli.h
@@ -496,6 +496,7 @@ class Relay_log_info : public Slave_reporting_capability
 */
 struct inuse_relaylog {
  inuse_relaylog *next;
+  Relay_log_info *rli;
  /* Number of events in this relay log queued for worker threads. */
  int64 queued_count;
  /* Number of events completed by worker threads. */