Commit c615061f authored by Mike Kozono's avatar Mike Kozono

Automatically resync verification failures

Changelog: added
EE: true
parent 2b2c4e7e
......@@ -104,7 +104,7 @@ module Geo::ReplicableRegistry
end
event :failed do
transition [:started] => :failed
transition [:started, :synced] => :failed
end
event :resync do
......@@ -120,6 +120,7 @@ module Geo::ReplicableRegistry
def failed!(message, error = nil)
self.last_sync_failure = message
self.last_sync_failure += ": #{error.message}" if error.respond_to?(:message)
self.last_sync_failure.truncate(255)
super()
end
......
......@@ -86,5 +86,23 @@ module Geo
def after_synced
self.verification_pending!
end
override :before_verification_failed
def before_verification_failed
# Let verification failure fields get set as usual
super
# When this registry became synced, retry_count was set to 0. This line
# ensures that resyncs due to verification failures use progressive
# backoff behavior. One is subtracted to compensate for 1 being
# automatically added to `retry_count` on transition to failed.
self.retry_count = self.verification_retry_count - 1
self.last_sync_failure = "Verification failed with: #{verification_failure}".truncate(255)
# This registry was successfully synced, and now it has failed
# verification. This line makes Geo automatically resync it.
self.failed
end
end
end
......@@ -67,10 +67,7 @@ module Gitlab
end
before_transition any => :verification_failed do |instance, _|
instance.verification_retry_count ||= 0
instance.verification_retry_count += 1
instance.verification_retry_at = instance.next_retry_time(instance.verification_retry_count)
instance.verified_at = Time.current
instance.before_verification_failed
end
before_transition any => :verification_succeeded do |instance, _|
......@@ -301,6 +298,14 @@ module Gitlab
self.verification_failure = nil
end
# Overridden by Geo::VerifiableRegistry
def before_verification_failed
self.verification_retry_count ||= 0
self.verification_retry_count += 1
self.verification_retry_at = self.next_retry_time(self.verification_retry_count)
self.verified_at = Time.current
end
# Provides a safe and easy way to manage the verification state for a
# synchronous checksum calculation.
#
......
......@@ -19,6 +19,21 @@ RSpec.shared_examples 'a Geo verifiable registry' do
end
end
context 'verification_state machine' do
context 'when transitioning to verification_failed' do
it 'changes state from synced to failed' do
registry = create(registry_class_factory, :synced)
registry.verification_failed_with_message!('foo')
expect(registry.reload).to be_failed
expect(registry.verification_failure).to eq('foo')
expect(registry.last_sync_failure).to eq('Verification failed with: foo')
expect(registry.retry_count).to eq(1)
end
end
end
describe '.verification_pending_batch' do
before do
subject.save!
......@@ -51,13 +66,22 @@ RSpec.shared_examples 'a Geo verifiable registry' do
describe '.verification_failed_batch' do
before do
subject.verification_failed_with_message!('foo')
# The setup is unusual because we don't want
# `before_verification_failure` to set the sync state to `:failed`.
# This lets us test things that are synced but failed verification, which
# should not happen anymore, but may exist from before we implemented
# automatic resync of verification failures.
subject.synced
subject.verification_state = verification_state_value(:verification_failed)
subject.verification_failure = 'foo'
subject.verification_retry_count = 1
subject.verified_at = Time.current
subject.verification_retry_at = verification_retry_at
subject.save!
end
context 'with a failed record with retry due' do
before do
subject.update!(verification_retry_at: 1.minute.ago)
end
let(:verification_retry_at) { 1.minute.ago }
it 'returns IDs of rows which are synced and have failed verification' do
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
......@@ -85,9 +109,9 @@ RSpec.shared_examples 'a Geo verifiable registry' do
end
context 'when verification_retry_at is in the future' do
it 'does not return the row which failed verification' do
subject.update!(verification_retry_at: 1.minute.from_now)
let(:verification_retry_at) { 1.minute.from_now }
it 'does not return the row which failed verification' do
expect(subject.class.verification_failed_batch(batch_size: 4)).not_to include(subject.model_record_id)
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment