Commit da2bb775 authored by Rémy Coutable's avatar Rémy Coutable

Merge branch 'mk/resync-verification-failures' into 'master'

Geo: Resync verification failures

See merge request gitlab-org/gitlab!66851
parents 3a8dd9a1 c615061f
...@@ -104,7 +104,7 @@ module Geo::ReplicableRegistry ...@@ -104,7 +104,7 @@ module Geo::ReplicableRegistry
end end
event :failed do event :failed do
transition [:started] => :failed transition [:started, :synced] => :failed
end end
event :resync do event :resync do
...@@ -120,6 +120,7 @@ module Geo::ReplicableRegistry ...@@ -120,6 +120,7 @@ module Geo::ReplicableRegistry
def failed!(message, error = nil) def failed!(message, error = nil)
self.last_sync_failure = message self.last_sync_failure = message
self.last_sync_failure += ": #{error.message}" if error.respond_to?(:message) self.last_sync_failure += ": #{error.message}" if error.respond_to?(:message)
self.last_sync_failure.truncate(255)
super() super()
end end
......
...@@ -86,5 +86,23 @@ module Geo ...@@ -86,5 +86,23 @@ module Geo
def after_synced def after_synced
self.verification_pending! self.verification_pending!
end end
override :before_verification_failed
def before_verification_failed
# Let verification failure fields get set as usual
super
# When this registry became synced, retry_count was set to 0. This line
# ensures that resyncs due to verification failures use progressive
# backoff behavior. One is subtracted to compensate for 1 being
# automatically added to `retry_count` on transition to failed.
self.retry_count = self.verification_retry_count - 1
self.last_sync_failure = "Verification failed with: #{verification_failure}".truncate(255)
# This registry was successfully synced, and now it has failed
# verification. This line makes Geo automatically resync it.
self.failed
end
end end
end end
...@@ -67,10 +67,7 @@ module Gitlab ...@@ -67,10 +67,7 @@ module Gitlab
end end
before_transition any => :verification_failed do |instance, _| before_transition any => :verification_failed do |instance, _|
instance.verification_retry_count ||= 0 instance.before_verification_failed
instance.verification_retry_count += 1
instance.verification_retry_at = instance.next_retry_time(instance.verification_retry_count)
instance.verified_at = Time.current
end end
before_transition any => :verification_succeeded do |instance, _| before_transition any => :verification_succeeded do |instance, _|
...@@ -301,6 +298,14 @@ module Gitlab ...@@ -301,6 +298,14 @@ module Gitlab
self.verification_failure = nil self.verification_failure = nil
end end
# Overridden by Geo::VerifiableRegistry
def before_verification_failed
self.verification_retry_count ||= 0
self.verification_retry_count += 1
self.verification_retry_at = self.next_retry_time(self.verification_retry_count)
self.verified_at = Time.current
end
# Provides a safe and easy way to manage the verification state for a # Provides a safe and easy way to manage the verification state for a
# synchronous checksum calculation. # synchronous checksum calculation.
# #
......
...@@ -19,6 +19,21 @@ RSpec.shared_examples 'a Geo verifiable registry' do ...@@ -19,6 +19,21 @@ RSpec.shared_examples 'a Geo verifiable registry' do
end end
end end
context 'verification_state machine' do
context 'when transitioning to verification_failed' do
it 'changes state from synced to failed' do
registry = create(registry_class_factory, :synced)
registry.verification_failed_with_message!('foo')
expect(registry.reload).to be_failed
expect(registry.verification_failure).to eq('foo')
expect(registry.last_sync_failure).to eq('Verification failed with: foo')
expect(registry.retry_count).to eq(1)
end
end
end
describe '.verification_pending_batch' do describe '.verification_pending_batch' do
before do before do
subject.save! subject.save!
...@@ -51,13 +66,22 @@ RSpec.shared_examples 'a Geo verifiable registry' do ...@@ -51,13 +66,22 @@ RSpec.shared_examples 'a Geo verifiable registry' do
describe '.verification_failed_batch' do describe '.verification_failed_batch' do
before do before do
subject.verification_failed_with_message!('foo') # The setup is unusual because we don't want
# `before_verification_failure` to set the sync state to `:failed`.
# This lets us test things that are synced but failed verification, which
# should not happen anymore, but may exist from before we implemented
# automatic resync of verification failures.
subject.synced
subject.verification_state = verification_state_value(:verification_failed)
subject.verification_failure = 'foo'
subject.verification_retry_count = 1
subject.verified_at = Time.current
subject.verification_retry_at = verification_retry_at
subject.save!
end end
context 'with a failed record with retry due' do context 'with a failed record with retry due' do
before do let(:verification_retry_at) { 1.minute.ago }
subject.update!(verification_retry_at: 1.minute.ago)
end
it 'returns IDs of rows which are synced and have failed verification' do it 'returns IDs of rows which are synced and have failed verification' do
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id]) expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
...@@ -85,9 +109,9 @@ RSpec.shared_examples 'a Geo verifiable registry' do ...@@ -85,9 +109,9 @@ RSpec.shared_examples 'a Geo verifiable registry' do
end end
context 'when verification_retry_at is in the future' do context 'when verification_retry_at is in the future' do
it 'does not return the row which failed verification' do let(:verification_retry_at) { 1.minute.from_now }
subject.update!(verification_retry_at: 1.minute.from_now)
it 'does not return the row which failed verification' do
expect(subject.class.verification_failed_batch(batch_size: 4)).not_to include(subject.model_record_id) expect(subject.class.verification_failed_batch(batch_size: 4)).not_to include(subject.model_record_id)
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment