Merge remote-tracking branch 'origin/master' into t

* origin/master: storage: start replicating the partition which is furthest behind master: fix possibly wrong knowledge of cells' backup_tid when resuming backup Minor comment/doc changes

Merge remote-tracking branch 'origin/master' into t
* origin/master: storage: start replicating the partition which is furthest behind master: fix possibly wrong knowledge of cells' backup_tid when resuming backup Minor comment/doc changes
2c0b400e · Kirill Smelkov · d68e25a6 · 4d3f3723 · 2c0b400e · 2c0b400e
Commit 2c0b400e authored Dec 22, 2016 by Kirill Smelkov
5 changed files
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -42,7 +42,6 @@ Change History
 - Plus a few other changes for debugging and developers, as well as small
  optimizations.

-
 1.6.3 (2016-06-15)
 ------------------


--- a/neo/master/backup_app.py
+++ b/neo/master/backup_app.py
@@ -57,9 +57,9 @@ for that partition. It means only this node will fetch data from the upstream
 cluster, to minimize bandwidth between clusters. Other replicas will
 synchronize from the primary node.

-There is no UUID conflict between the 2 clusters:
+There is no conflict of node id between the 2 clusters:
 - Storage nodes connect anonymously to upstream.
- Master node receives a new from upstream master and uses it only when
+- The master node gets an id from the upstream master and uses it only when
  communicating with it.
 """

@@ -197,9 +197,8 @@ class BackupApplication(object):

    # NOTE called by backup_app.invalidateObjects() when it has info that
    # partitions in partition_set were updated in upstream cluster (up to `tid`)
-    def invalidatePartitions(self, tid, partition_set):
+    def invalidatePartitions(self, tid, prev_tid, partition_set):
        app = self.app
-        prev_tid = app.getLastTransaction()
        app.setLastTransaction(tid)
        pt = app.pt
        trigger_set = set()
@@ -315,8 +314,9 @@ class BackupApplication(object):
        logging.debug("partition %u: updating backup_tid of %r to %s",
                      offset, cell, dump(tid))
        cell.backup_tid = tid
-        # TODO provide invalidation feedback about new txns to read-only clients connected to backup cluster
-        # NOTE ^^^ not only here but also hooked to in-progress feedback from fetchObjects (storage)
+        # TODO: Provide invalidation feedback about new txns to read-only
+        #       clients connected to backup cluster. Not only here but also
+        #       hooked to in-progress feedback from fetchObjects (storage).
        # Forget tids we won't need anymore.
        cell_list = app.pt.getCellList(offset, readable=True)
        del tid_list[:bisect(tid_list, min(x.backup_tid for x in cell_list))]   # XXX not only for primary Sb ?

--- a/neo/master/handlers/backup.py
+++ b/neo/master/handlers/backup.py
@@ -35,10 +35,18 @@ class BackupHandler(EventHandler):
    # NOTE invalidation from M -> Mb (all partitions)
    def answerLastTransaction(self, conn, tid):
        app = self.app
-        if tid != ZERO_TID:
-            app.invalidatePartitions(tid, set(xrange(app.pt.getPartitions())))
-        else: # upstream DB is empty
-            assert app.app.getLastTransaction() == tid
+        prev_tid = app.app.getLastTransaction()
+        if prev_tid < tid:
+            # Since we don't know which partitions were modified during our
+            # absence, we must force replication on all storages. As long as
+            # they haven't done this first check, our backup tid will remain
+            # inferior to this 'tid'. We don't know the real prev_tid, which is:
+            #   >= app.app.getLastTransaction()
+            #   < tid
+            # but passing 'tid' is good enough.
+            app.invalidatePartitions(tid, tid, xrange(app.pt.getPartitions()))
+        elif prev_tid != tid:
+            raise RuntimeError("upstream DB truncated")
        app.ignore_invalidations = False

    # NOTE invalidation from M -> Mb
@@ -49,4 +57,5 @@ class BackupHandler(EventHandler):
        getPartition = app.app.pt.getPartition
        partition_set = set(map(getPartition, oid_list))
        partition_set.add(getPartition(tid))
-        app.invalidatePartitions(tid, partition_set)
+        prev_tid = app.app.getLastTransaction()
+        app.invalidatePartitions(tid, prev_tid, partition_set)
--- a/neo/storage/replicator.py
+++ b/neo/storage/replicator.py
@@ -214,6 +214,10 @@ class Replicator(object):
            self.updateBackupTID()
        self._nextPartition()

+    def _nextPartitionSortKey(self, offset):
+        p = self.partition_dict[offset]
+        return p.next_obj, bool(p.max_ttid)
+
    def _nextPartition(self):
        # XXX: One connection to another storage may remain open forever.
        #      All other previous connections are automatically closed
@@ -227,12 +231,12 @@ class Replicator(object):
        if self.current_partition is not None or not self.replicate_dict:
            return
        app = self.app
-        # Choose a partition with no unfinished transaction if possible.
+        # Start replicating the partition which is furthest behind,
+        # to increase the overall backup_tid as soon as possible.
+        # Then prefer a partition with no unfinished transaction.
        # XXX: When leaving backup mode, we should only consider UP_TO_DATE
        #      cells.
-        for offset in self.replicate_dict:
-            if not self.partition_dict[offset].max_ttid:
-                break
+        offset = min(self.replicate_dict, key=self._nextPartitionSortKey)
        try:
            addr, name = self.source_dict[offset]
        except KeyError:

--- a/neo/tests/threaded/testReplication.py
+++ b/neo/tests/threaded/testReplication.py
@@ -394,6 +394,29 @@ class ReplicationTests(NEOThreadedTest):
        finally:
            upstream.stop()

+    @backup_test()
+    def testBackupTid(self, backup):
+        """
+        Check that the backup cluster does not claim it has all the data just
+        after it came back whereas new transactions were committed during its
+        absence.
+        """
+        importZODB = backup.upstream.importZODB()
+        importZODB(1)
+        self.tic()
+        last_tid = backup.upstream.last_tid
+        self.assertEqual(last_tid, backup.backup_tid)
+        backup.stop()
+        importZODB(1)
+        backup.reset()
+        with ConnectionFilter() as f:
+            f.add(lambda conn, packet:
+                isinstance(packet, Packets.AskFetchTransactions))
+            backup.start()
+            self.assertEqual(last_tid, backup.backup_tid)
+        self.tic()
+        self.assertEqual(1, self.checkBackup(backup))
+
    def testSafeTweak(self):
        """
        Check that tweak always tries to keep a minimum of (replicas + 1)