Actively try to correct verification failures on the secondary

parent 474186b6
...@@ -17,9 +17,9 @@ it from the primary to resolve the issue. ...@@ -17,9 +17,9 @@ it from the primary to resolve the issue.
If verification succeeds on the **primary** but fails on the **secondary**, If verification succeeds on the **primary** but fails on the **secondary**,
this indicates that the object was corrupted during the replication process. this indicates that the object was corrupted during the replication process.
Until [issue #5195][ee-5195] is implemented, Geo won't automatically resolve Geo actively try to correct verification failures marking the repository to
verification failures of this kind, so you should follow be resynced with a backoff period. If you want to reset the verification for
[these instructions][reset-verification] these failures, so you should follow [these instructions][reset-verification].
If verification is lagging significantly behind replication, consider giving If verification is lagging significantly behind replication, consider giving
the node more time before scheduling a planned failover. the node more time before scheduling a planned failover.
...@@ -88,9 +88,10 @@ for every node after every update to make sure that they are all in sync. ...@@ -88,9 +88,10 @@ for every node after every update to make sure that they are all in sync.
# Reset verification for projects where verification has failed # Reset verification for projects where verification has failed
Until [issue #5195][ee-5195] is implemented, Geo won't automatically resolve Geo actively try to correct verification failures marking the repository to
verification failures, so you should reset them manually. This rake task marks be resynced with a backoff period. If you want to reset them manually, this
projects where verification has failed or the checksum mismatch to be resynced: rake task marks projects where verification has failed or the checksum mismatch
to be resynced without the backoff period:
#### For repositories: #### For repositories:
......
...@@ -355,12 +355,9 @@ module Geo ...@@ -355,12 +355,9 @@ module Geo
# @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of registries that need verification # @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of registries that need verification
def legacy_find_registries_to_verify(batch_size:) def legacy_find_registries_to_verify(batch_size:)
repo_condition = local_repo_condition
wiki_condition = local_wiki_condition
registries = Geo::ProjectRegistry registries = Geo::ProjectRegistry
.where(repo_condition.or(wiki_condition)) .where(local_repo_condition.or(local_wiki_condition))
.pluck(:project_id, repo_condition.to_sql, wiki_condition.to_sql) .pluck(:project_id, local_repo_condition.to_sql, local_wiki_condition.to_sql)
return Geo::ProjectRegistry.none if registries.empty? return Geo::ProjectRegistry.none if registries.empty?
...@@ -410,12 +407,22 @@ module Geo ...@@ -410,12 +407,22 @@ module Geo
local_registry_table[:repository_verification_checksum_sha].eq(nil) local_registry_table[:repository_verification_checksum_sha].eq(nil)
.and(local_registry_table[:last_repository_verification_failure].eq(nil)) .and(local_registry_table[:last_repository_verification_failure].eq(nil))
.and(local_registry_table[:resync_repository].eq(false)) .and(local_registry_table[:resync_repository].eq(false))
.and(repository_missing_on_primary_is_not_true)
end end
def local_wiki_condition def local_wiki_condition
local_registry_table[:wiki_verification_checksum_sha].eq(nil) local_registry_table[:wiki_verification_checksum_sha].eq(nil)
.and(local_registry_table[:last_wiki_verification_failure].eq(nil)) .and(local_registry_table[:last_wiki_verification_failure].eq(nil))
.and(local_registry_table[:resync_wiki].eq(false)) .and(local_registry_table[:resync_wiki].eq(false))
.and(wiki_missing_on_primary_is_not_true)
end
def repository_missing_on_primary_is_not_true
Arel::Nodes::SqlLiteral.new("project_registry.repository_missing_on_primary IS NOT TRUE")
end
def wiki_missing_on_primary_is_not_true
Arel::Nodes::SqlLiteral.new("project_registry.wiki_missing_on_primary IS NOT TRUE")
end end
end end
end end
...@@ -4,6 +4,7 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -4,6 +4,7 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
include ::IgnorableColumn include ::IgnorableColumn
include ::ShaAttribute include ::ShaAttribute
MAX_VERIFICATION_RETRIES = 5
REGISTRY_TYPES = %i{repository wiki}.freeze REGISTRY_TYPES = %i{repository wiki}.freeze
RETRIES_BEFORE_REDOWNLOAD = 5 RETRIES_BEFORE_REDOWNLOAD = 5
...@@ -115,7 +116,11 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -115,7 +116,11 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
"#{type}_verification_checksum_sha" => nil, "#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => false, "#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil, "last_#{type}_verification_failure" => nil,
"resync_#{type}_was_scheduled_at" => scheduled_at) "#{type}_verification_retry_count" => nil,
"resync_#{type}_was_scheduled_at" => scheduled_at,
"#{type}_retry_count" => nil,
"#{type}_retry_at" => nil
)
end end
def repository_sync_due?(scheduled_time) def repository_sync_due?(scheduled_time)
...@@ -150,6 +155,14 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -150,6 +155,14 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
retry_count(type) > RETRIES_BEFORE_REDOWNLOAD retry_count(type) > RETRIES_BEFORE_REDOWNLOAD
end end
def should_be_reverified?(type)
verification_retry_count(type) <= MAX_VERIFICATION_RETRIES
end
def verification_retry_count(type)
public_send("#{type}_verification_retry_count").to_i # rubocop:disable GitlabSecurity/PublicSend
end
private private
def fetches_since_gc_redis_key def fetches_since_gc_redis_key
...@@ -178,15 +191,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -178,15 +191,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
last_wiki_synced_at && timestamp > last_wiki_synced_at last_wiki_synced_at && timestamp > last_wiki_synced_at
end end
# To prevent the retry time from storing invalid dates in the database,
# cap the max time to a week plus some random jitter value.
def next_retry_time(retry_count)
proposed_time = Time.now + delay(retry_count).seconds
max_future_time = Time.now + 7.days + delay(1).seconds
[proposed_time, max_future_time].min
end
def retry_count(type) def retry_count(type)
public_send("#{type}_retry_count") || -1 # rubocop:disable GitlabSecurity/PublicSend public_send("#{type}_retry_count") || -1 # rubocop:disable GitlabSecurity/PublicSend
end end
......
# frozen_string_literal: true
module Geo
class BaseRepositoryVerificationService
include Delay
include Gitlab::Geo::ProjectLogHelpers
def execute
raise NotImplementedError
end
private
def calculate_checksum(repository)
repository.checksum
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Git::Repository::InvalidRepository
Gitlab::Git::Repository::EMPTY_REPOSITORY_CHECKSUM
end
def calculate_next_retry_attempt(resource, type)
retry_count = resource.public_send("#{type}_retry_count").to_i + 1 # rubocop:disable GitlabSecurity/PublicSend
[next_retry_time(retry_count), retry_count]
end
end
end
module Geo # frozen_string_literal: true
class RepositoryVerificationPrimaryService
include Delay
include Gitlab::Geo::ProjectLogHelpers
module Geo
class RepositoryVerificationPrimaryService < BaseRepositoryVerificationService
def initialize(project) def initialize(project)
@project = project @project = project
end end
def execute def execute
calculate_repository_checksum verify_checksum(:repository, project.repository)
calculate_wiki_checksum verify_checksum(:wiki, project.wiki.repository)
end end
private private
attr_reader :project attr_reader :project
def calculate_repository_checksum def verify_checksum(type, repository)
calculate_checksum(:repository, project.repository) checksum = calculate_checksum(repository)
end update_repository_state!(type, checksum: checksum)
def calculate_wiki_checksum
calculate_checksum(:wiki, project.wiki.repository)
end
def calculate_checksum(type, repository)
update_repository_state!(type, checksum: repository.checksum)
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Git::Repository::InvalidRepository
update_repository_state!(type, checksum: Gitlab::Git::Repository::EMPTY_REPOSITORY_CHECKSUM)
rescue => e rescue => e
log_error('Error calculating the repository checksum', e, type: type) log_error("Error calculating the #{type} checksum", e, type: type)
update_repository_state!(type, failure: e.message) update_repository_state!(type, failure: e.message)
end end
def update_repository_state!(type, checksum: nil, failure: nil) def update_repository_state!(type, checksum: nil, failure: nil)
retry_at, retry_count = retry_at, retry_count =
if failure.present? if failure.present?
retry_count = repository_state.public_send("#{type}_retry_count").to_i + 1 # rubocop:disable GitlabSecurity/PublicSend calculate_next_retry_attempt(repository_state, type)
[next_retry_time(retry_count), retry_count]
end end
repository_state.update!( repository_state.update!(
...@@ -48,15 +37,6 @@ module Geo ...@@ -48,15 +37,6 @@ module Geo
) )
end end
# To prevent the retry time from storing invalid dates in the database,
# cap the max time to a week plus some random jitter value.
def next_retry_time(retry_count)
proposed_time = Time.now + delay(retry_count).seconds
max_future_time = Time.now + 7.days + delay(1).seconds
[proposed_time, max_future_time].min
end
def repository_state def repository_state
@repository_state ||= project.repository_state || project.build_repository_state @repository_state ||= project.repository_state || project.build_repository_state
end end
......
...@@ -47,7 +47,9 @@ module Geo ...@@ -47,7 +47,9 @@ module Geo
"resync_#{type}" => true, "resync_#{type}" => true,
"#{type}_verification_checksum_sha" => nil, "#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => false, "#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil "last_#{type}_verification_failure" => nil,
"#{type}_verification_retry_count" => nil,
"#{type}_missing_on_primary" => nil
} }
end end
end end
......
module Geo # frozen_string_literal: true
class RepositoryVerificationSecondaryService
include Gitlab::Geo::ProjectLogHelpers
module Geo
class RepositoryVerificationSecondaryService < BaseRepositoryVerificationService
def initialize(registry, type) def initialize(registry, type)
@registry = registry @registry = registry
@type = type.to_sym @type = type.to_sym
...@@ -23,14 +23,22 @@ module Geo ...@@ -23,14 +23,22 @@ module Geo
def should_verify_checksum? def should_verify_checksum?
return false if resync? return false if resync?
return false unless primary_checksum.present?
primary_checksum.present? && primary_checksum != secondary_checksum mismatch?(secondary_checksum)
end end
def resync? def resync?
registry.public_send("resync_#{type}") # rubocop:disable GitlabSecurity/PublicSend registry.public_send("resync_#{type}") # rubocop:disable GitlabSecurity/PublicSend
end end
def reverify?
# We can have stale checksums in the database, in this case, even resyncing
# the project the checksum will not match. This guard clause prevents us to
# retry the verification for the same projects over and over.
registry.should_be_reverified?(type)
end
def primary_checksum def primary_checksum
project.repository_state.public_send("#{type}_verification_checksum") # rubocop:disable GitlabSecurity/PublicSend project.repository_state.public_send("#{type}_verification_checksum") # rubocop:disable GitlabSecurity/PublicSend
end end
...@@ -39,46 +47,45 @@ module Geo ...@@ -39,46 +47,45 @@ module Geo
registry.public_send("#{type}_verification_checksum_sha") # rubocop:disable GitlabSecurity/PublicSend registry.public_send("#{type}_verification_checksum_sha") # rubocop:disable GitlabSecurity/PublicSend
end end
def mismatch?(checksum)
primary_checksum != checksum
end
def verify_checksum def verify_checksum
checksum = calculate_checksum checksum = calculate_checksum(repository)
if mismatch?(checksum) if mismatch?(checksum)
update_registry!(mismatch: true, failure: "#{type.to_s.capitalize} checksum mismatch: #{repository.disk_path}") update_registry!(mismatch: true, failure: "#{type.to_s.capitalize} checksum mismatch")
else else
update_registry!(checksum: checksum) update_registry!(checksum: checksum)
end end
rescue ::Gitlab::Git::Repository::ChecksumError, Timeout::Error => e rescue => e
update_registry!(failure: "Error verifying #{type.to_s.capitalize} checksum: #{repository.disk_path}", exception: e) update_registry!(failure: "Error calculating #{type} checksum", exception: e)
end end
def calculate_checksum def update_registry!(checksum: nil, mismatch: false, failure: nil, exception: nil)
repository.checksum reverify, verification_retry_count =
rescue Gitlab::Git::Repository::NoRepository, Gitlab::Git::Repository::InvalidRepository if mismatch || failure.present?
Gitlab::Git::Repository::EMPTY_REPOSITORY_CHECKSUM log_error(failure, exception, type: type)
[reverify?, registry.verification_retry_count(type) + 1]
end end
def mismatch?(checksum) resync_retry_at, resync_retry_count =
primary_checksum != checksum if reverify
[*calculate_next_retry_attempt(registry, type)]
end end
def update_registry!(checksum: nil, mismatch: false, failure: nil, exception: nil, details: {}) registry.update!(
attrs = {
"#{type}_verification_checksum_sha" => checksum, "#{type}_verification_checksum_sha" => checksum,
"#{type}_checksum_mismatch" => mismatch, "#{type}_checksum_mismatch" => mismatch,
"last_#{type}_verification_failure" => failure "last_#{type}_verification_failure" => failure,
} "#{type}_verification_retry_count" => verification_retry_count,
"resync_#{type}" => reverify.present?,
if failure "#{type}_retry_at" => resync_retry_at,
log_error(failure, exception, "#{type}_retry_count" => resync_retry_count
type: type,
repository_shard: project.repository_storage,
repsitory_disk_path: repository.disk_path
) )
end end
registry.update!(attrs)
end
def repository def repository
@repository ||= @repository ||=
case type case type
......
# frozen_string_literal: true
class AddRetryVerificationFieldsToProjectRegistry < ActiveRecord::Migration
def change
add_column :project_registry, :repository_verification_retry_count, :integer
add_column :project_registry, :wiki_verification_retry_count, :integer
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180727221937) do ActiveRecord::Schema.define(version: 20180802215313) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
...@@ -80,6 +80,8 @@ ActiveRecord::Schema.define(version: 20180727221937) do ...@@ -80,6 +80,8 @@ ActiveRecord::Schema.define(version: 20180727221937) do
t.datetime_with_timezone "resync_wiki_was_scheduled_at" t.datetime_with_timezone "resync_wiki_was_scheduled_at"
t.boolean "repository_missing_on_primary" t.boolean "repository_missing_on_primary"
t.boolean "wiki_missing_on_primary" t.boolean "wiki_missing_on_primary"
t.integer "repository_verification_retry_count"
t.integer "wiki_verification_retry_count"
end end
add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree
......
...@@ -3,4 +3,13 @@ module Delay ...@@ -3,4 +3,13 @@ module Delay
def delay(retry_count = 0) def delay(retry_count = 0)
(retry_count**4) + 15 + (rand(30) * (retry_count + 1)) (retry_count**4) + 15 + (rand(30) * (retry_count + 1))
end end
# To prevent the retry time from storing invalid dates in the database,
# cap the max time to a week plus some random jitter value.
def next_retry_time(retry_count)
proposed_time = Time.now + delay(retry_count).seconds
max_future_time = Time.now + 7.days + delay(1).seconds
[proposed_time, max_future_time].min
end
end end
...@@ -677,6 +677,20 @@ describe Geo::ProjectRegistryFinder, :geo do ...@@ -677,6 +677,20 @@ describe Geo::ProjectRegistryFinder, :geo do
expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty
end end
it 'does not return registries when the repository is missing on primary' do
project_verified = create(:repository_state, :repository_verified).project
create(:geo_project_registry, :synced, project: project_verified, repository_missing_on_primary: true)
expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty
end
it 'does not return registries when the wiki is missing on primary' do
project_verified = create(:repository_state, :wiki_verified).project
create(:geo_project_registry, :synced, project: project_verified, wiki_missing_on_primary: true)
expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty
end
end end
end end
......
This diff is collapsed.
...@@ -53,7 +53,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -53,7 +53,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_repository: true, resync_repository: true,
repository_verification_checksum_sha: nil, repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false, repository_checksum_mismatch: false,
last_repository_verification_failure: nil last_repository_verification_failure: nil,
repository_verification_retry_count: nil,
repository_missing_on_primary: nil
) )
end end
...@@ -67,7 +69,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -67,7 +69,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_repository: true, resync_repository: true,
repository_verification_checksum_sha: nil, repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false, repository_checksum_mismatch: false,
last_repository_verification_failure: nil last_repository_verification_failure: nil,
repository_verification_retry_count: nil,
repository_missing_on_primary: nil
) )
end end
...@@ -81,7 +85,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -81,7 +85,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_repository: false, resync_repository: false,
repository_verification_checksum_sha: be_present, repository_verification_checksum_sha: be_present,
repository_checksum_mismatch: false, repository_checksum_mismatch: false,
last_repository_verification_failure: nil last_repository_verification_failure: nil,
repository_verification_retry_count: nil,
repository_missing_on_primary: nil
) )
end end
...@@ -95,7 +101,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -95,7 +101,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_repository: false, resync_repository: false,
repository_verification_checksum_sha: nil, repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false, repository_checksum_mismatch: false,
last_repository_verification_failure: nil last_repository_verification_failure: nil,
repository_verification_retry_count: nil,
repository_missing_on_primary: nil
) )
end end
end end
...@@ -121,7 +129,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -121,7 +129,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_wiki: true, resync_wiki: true,
wiki_verification_checksum_sha: nil, wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false, wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil,
wiki_missing_on_primary: nil
) )
end end
...@@ -135,7 +145,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -135,7 +145,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_wiki: true, resync_wiki: true,
wiki_verification_checksum_sha: nil, wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false, wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil,
wiki_missing_on_primary: nil
) )
end end
...@@ -149,7 +161,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -149,7 +161,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_wiki: false, resync_wiki: false,
wiki_verification_checksum_sha: be_present, wiki_verification_checksum_sha: be_present,
wiki_checksum_mismatch: false, wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil,
wiki_missing_on_primary: nil
) )
end end
...@@ -163,7 +177,9 @@ describe Geo::RepositoryVerificationReset, :geo do ...@@ -163,7 +177,9 @@ describe Geo::RepositoryVerificationReset, :geo do
resync_wiki: false, resync_wiki: false,
wiki_verification_checksum_sha: nil, wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false, wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil last_wiki_verification_failure: nil,
wiki_verification_retry_count: nil,
wiki_missing_on_primary: nil
) )
end end
end end
......
...@@ -3,19 +3,13 @@ require 'spec_helper' ...@@ -3,19 +3,13 @@ require 'spec_helper'
describe Geo::RepositoryVerificationSecondaryService, :geo do describe Geo::RepositoryVerificationSecondaryService, :geo do
include ::EE::GeoHelpers include ::EE::GeoHelpers
let(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
end
shared_examples 'verify checksums for repositories/wikis' do |type| shared_examples 'verify checksums for repositories/wikis' do |type|
let(:repository) { find_repository(type) } let(:repository) { find_repository(type) }
subject(:service) { described_class.new(registry, type) } subject(:service) { described_class.new(registry, type) }
it 'does not calculate the checksum when not running on a secondary' do it 'does not calculate the checksum when not running on a secondary' do
allow(Gitlab::Geo).to receive(:secondary?) { false } allow(Gitlab::Geo).to receive(:secondary?).and_return(false)
expect(repository).not_to receive(:checksum) expect(repository).not_to receive(:checksum)
...@@ -38,7 +32,7 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do ...@@ -38,7 +32,7 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do
service.execute service.execute
end end
it 'does not verify the checksum if the checksums already match' do it 'does not verify the checksum if the current checksum matches' do
repository_state.assign_attributes("#{type}_verification_checksum" => 'my_checksum') repository_state.assign_attributes("#{type}_verification_checksum" => 'my_checksum')
registry.assign_attributes("#{type}_verification_checksum_sha" => 'my_checksum') registry.assign_attributes("#{type}_verification_checksum_sha" => 'my_checksum')
...@@ -48,14 +42,18 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do ...@@ -48,14 +42,18 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do
end end
it 'sets checksum when the checksum matches' do it 'sets checksum when the checksum matches' do
expect(repository).to receive(:checksum).and_return('my_checksum') allow(repository).to receive(:checksum).and_return('my_checksum')
service.execute service.execute
expect(registry).to have_attributes( expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => 'my_checksum', "#{type}_verification_checksum_sha" => 'my_checksum',
"#{type}_checksum_mismatch" => false, "#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil "last_#{type}_verification_failure" => nil,
"#{type}_verification_retry_count" => nil,
"resync_#{type}" => false,
"#{type}_retry_at" => nil,
"#{type}_retry_count" => nil
) )
end end
...@@ -69,22 +67,96 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do ...@@ -69,22 +67,96 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do
expect(registry).to have_attributes( expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => '0000000000000000000000000000000000000000', "#{type}_verification_checksum_sha" => '0000000000000000000000000000000000000000',
"#{type}_checksum_mismatch" => false, "#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil "last_#{type}_verification_failure" => nil,
"#{type}_verification_retry_count" => nil,
"resync_#{type}" => false,
"#{type}_retry_at" => nil,
"#{type}_retry_count" => nil
)
end
context 'when the checksum mismatch' do
before do
allow(repository).to receive(:checksum).and_return('other_checksum')
end
it 'keeps track of failures' do
service.execute
expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => true,
"last_#{type}_verification_failure" => "#{type.to_s.capitalize} checksum mismatch",
"#{type}_verification_retry_count" => 1,
"resync_#{type}" => true,
"#{type}_retry_at" => be_present,
"#{type}_retry_count" => 1
) )
end end
it 'keeps track of failure when the checksum mismatch' do it "does not mark #{type} to be resynced when max retry attempts reached" do
expect(repository).to receive(:checksum).and_return('other_checksum') stub_const('Geo::ProjectRegistry::MAX_VERIFICATION_RETRIES', 1)
registry.update("#{type}_verification_retry_count" => 2)
service.execute service.execute
expect(registry).to have_attributes( expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => nil, "#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => true, "#{type}_checksum_mismatch" => true,
"last_#{type}_verification_failure" => /#{Regexp.quote(type.to_s.capitalize)} checksum mismatch/ "last_#{type}_verification_failure" => "#{type.to_s.capitalize} checksum mismatch",
"#{type}_verification_retry_count" => 3,
"resync_#{type}" => false,
"#{type}_retry_at" => nil,
"#{type}_retry_count" => nil
) )
end end
it 'ensures the next retry time is capped properly' do
registry.update("#{type}_retry_count" => 30)
service.execute
expect(registry).to have_attributes(
"resync_#{type}" => true,
"#{type}_retry_at" => be_within(100.seconds).of(Time.now + 7.days),
"#{type}_retry_count" => 31
)
end
end
context 'when checksum calculation fails' do
before do
allow(repository).to receive(:checksum).and_raise("Something went wrong with #{type}")
end
it 'keeps track of failures' do
service.execute
expect(registry).to have_attributes(
"#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => "Error calculating #{type} checksum",
"#{type}_verification_retry_count" => 1,
"resync_#{type}" => true,
"#{type}_retry_at" => be_present,
"#{type}_retry_count" => 1
)
end
it 'ensures the next retry time is capped properly' do
registry.update("#{type}_retry_count" => 30)
service.execute
expect(registry).to have_attributes(
"resync_#{type}" => true,
"#{type}_retry_at" => be_within(100.seconds).of(Time.now + 7.days),
"#{type}_retry_count" => 31
)
end
end
def find_repository(type) def find_repository(type)
case type case type
when :repository then project.repository when :repository then project.repository
...@@ -93,16 +165,22 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do ...@@ -93,16 +165,22 @@ describe Geo::RepositoryVerificationSecondaryService, :geo do
end end
end end
let(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
end
describe '#execute' do describe '#execute' do
let(:project) { create(:project, :repository, :wiki_repo) } let(:project) { create(:project, :repository, :wiki_repo) }
let!(:repository_state) { create(:repository_state, project: project, repository_verification_checksum: 'my_checksum', wiki_verification_checksum: 'my_checksum') } let!(:repository_state) { create(:repository_state, project: project, repository_verification_checksum: 'my_checksum', wiki_verification_checksum: 'my_checksum') }
let(:registry) { create(:geo_project_registry, :synced, project: project) } let(:registry) { create(:geo_project_registry, :synced, project: project) }
context 'repository' do context 'for a repository' do
include_examples 'verify checksums for repositories/wikis', :repository include_examples 'verify checksums for repositories/wikis', :repository
end end
context 'wiki' do context 'for a wiki' do
include_examples 'verify checksums for repositories/wikis', :wiki include_examples 'verify checksums for repositories/wikis', :wiki
end end
end end
......
...@@ -52,6 +52,18 @@ describe Geo::RepositoryVerification::Secondary::ShardWorker, :postgresql, :clea ...@@ -52,6 +52,18 @@ describe Geo::RepositoryVerification::Secondary::ShardWorker, :postgresql, :clea
subject.perform(shard_name) subject.perform(shard_name)
end end
it 'does not schedule jobs for projects missing repositories on primary' do
other_project = create(:project)
create(:repository_state, :repository_verified, project: project)
create(:repository_state, :wiki_verified, project: other_project)
create(:geo_project_registry, :synced, project: project, repository_missing_on_primary: true)
create(:geo_project_registry, :synced, project: other_project, wiki_missing_on_primary: true)
expect(secondary_singleworker).not_to receive(:perform_async)
subject.perform(shard_name)
end
# test that when jobs are always moving forward and we're not querying the same things # test that when jobs are always moving forward and we're not querying the same things
# over and over # over and over
describe 'resource loading' do describe 'resource loading' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment