Add rake task to resync projects where verification has failed

parent da555873
......@@ -15,15 +15,11 @@ If verification fails on the **primary**, this indicates that Geo is
successfully replicating a corrupted object; restore it from backup or remove it
it from the primary to resolve the issue.
If verification succeeds on the **primary** but fails on the **secondary**, this
indicates that the object was corrupted during the replication process. Until
[issue #5195][ee-5195] is implemented, Geo won't automatically resolve
verification failures of this kind, so you should remove the registry entries to
force Geo to re-replicate the files:
```
sudo gitlab-rails runner 'Geo::ProjectRegistry.verification_failed.delete_all'
```
If verification succeeds on the **primary** but fails on the **secondary**,
this indicates that the object was corrupted during the replication process.
Until [issue #5195][ee-5195] is implemented, Geo won't automatically resolve
verification failures of this kind, so you should follow
[these instructions][reset-verification]
If verification is lagging significantly behind replication, consider giving
the node more time before scheduling a planned failover.
......@@ -90,6 +86,40 @@ GitLab-specific references to ensure true consistency. If two nodes have the
same checksum, then they definitely hold the same data. We compute the checksum
for every node after every update to make sure that they are all in sync.
# Reset verification for projects where verification has failed
Until [issue #5195][ee-5195] is implemented, Geo won't automatically resolve
verification failures, so you should reset them manually. This rake task marks
projects where verification has failed or the checksum mismatch to be resynced:
#### For repositories:
**Omnibus Installation**
```
sudo gitlab-rake geo:verification:repository:reset
```
**Source Installation**
```bash
sudo -u git -H bundle exec rake geo:verification:repository:reset RAILS_ENV=production
```
#### For wikis:
**Omnibus Installation**
```
sudo gitlab-rake geo:verification:wiki:reset
```
**Source Installation**
```bash
sudo -u git -H bundle exec rake geo:verification:wiki:reset RAILS_ENV=production
```
# Current limitations
Until [issue #5064][ee-5064] is completed, background verification doesn't cover
......@@ -102,6 +132,7 @@ for ensuring the integrity of the data.
[disaster-recovery]: index.md
[feature-flag]: background_verification.md#enabling-or-disabling-the-automatic-background-verification
[reset-verification]: background_verification.md#reset-verification-for-projects-where-verification-has-failed
[foreground-verification]: ../../raketasks/check.md
[ee-5064]: https://gitlab.com/gitlab-org/gitlab-ee/issues/5064
[ee-5699]: https://gitlab.com/gitlab-org/gitlab-ee/issues/5699
......
......@@ -4,6 +4,7 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
include ::IgnorableColumn
include ::ShaAttribute
REGISTRY_TYPES = %i{repository wiki}.freeze
RETRIES_BEFORE_REDOWNLOAD = 5
ignore_column :last_repository_verification_at
......
# frozen_string_literal: true
module Geo
class RepositoryVerificationReset
def initialize(type)
@type = type
end
def execute
return unless Gitlab::Geo.geo_database_configured?
return unless Gitlab::Geo.secondary?
raise ArgumentError, "Invalid type: '#{type.inspect}'" unless valid_type?
num_updated = 0
Geo::ProjectRegistry
.where(verification_failed.or(checksum_mismatch))
.select(:id)
.each_batch { |relation| num_updated += relation.update_all(updates) }
num_updated
end
private
attr_reader :type
def valid_type?
Geo::ProjectRegistry::REGISTRY_TYPES.include?(type.to_sym)
end
def project_registry
Geo::ProjectRegistry.arel_table
end
def checksum_mismatch
project_registry["#{type}_checksum_mismatch"].eq(true)
end
def verification_failed
project_registry["last_#{type}_verification_failure"].not_eq(nil)
end
def updates
{
"resync_#{type}" => true,
"#{type}_verification_checksum_sha" => nil,
"#{type}_checksum_mismatch" => false,
"last_#{type}_verification_failure" => nil
}
end
end
end
---
title: 'Geo: Add rake task to resync projects where verification has failed'
merge_request: 6727
author:
type: other
# frozen_string_literal: true
namespace :geo do
namespace :verification do
namespace :repository do
desc "GitLab | Verification | Repository | Reset | Resync repositories where verification has failed"
task reset: :gitlab_environment do
flag_for_resync(:repository)
end
end
namespace :wiki do
desc "GitLab | Verification | Wiki | Reset | Resync wiki repositories where verification has failed"
task reset: :gitlab_environment do
flag_for_resync(:wiki)
end
end
def flag_for_resync(type)
abort GEO_LICENSE_ERROR_TEXT unless Gitlab::Geo.license_allows?
unless Gitlab::Geo.secondary?
puts "This command is only available on a secondary node".color(:red)
exit
end
puts "Marking #{type.to_s.pluralize} where verification has failed to be resynced..."
num_updated = Geo::RepositoryVerificationReset.new(type).execute
puts "Number of #{type.to_s.pluralize} marked to be resynced: #{num_updated}"
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Geo::RepositoryVerificationReset, :geo do
include ::EE::GeoHelpers
let(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
end
describe '#execute' do
context 'validations' do
subject { described_class.new(:foo) }
it 'returns nil when Geo database is not configured' do
allow(Gitlab::Geo).to receive(:geo_database_configured?).and_return(false)
expect(subject.execute).to be_nil
end
it 'returns nil when not running on a secondary' do
allow(Gitlab::Geo).to receive(:secondary?).and_return(false)
expect(subject.execute).to be_nil
end
it 'raises an error for an invalid registry type' do
expect { subject.execute }.to raise_error(ArgumentError, "Invalid type: ':foo'")
end
end
context 'for repositories' do
subject { described_class.new(:repository) }
it 'returns the total number of projects marked for resync' do
create(:geo_project_registry, :synced, :repository_verified)
create(:geo_project_registry, :synced, :repository_verification_failed)
create(:geo_project_registry, :synced, :repository_verification_failed)
expect(subject.execute).to eq 2
end
it 'marks projects where verification has failed to be resynced' do
registry_verification_failed =
create(:geo_project_registry, :synced, :repository_verification_failed)
subject.execute
expect(registry_verification_failed.reload).to have_attributes(
resync_repository: true,
repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false,
last_repository_verification_failure: nil
)
end
it 'marks projects where checksum mismatch to be resynced' do
registry_checksum_mismatch =
create(:geo_project_registry, :synced, :repository_checksum_mismatch)
subject.execute
expect(registry_checksum_mismatch.reload).to have_attributes(
resync_repository: true,
repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false,
last_repository_verification_failure: nil
)
end
it 'does not mark projects where verification succeeded to be resynced' do
registry_verification_succeeded =
create(:geo_project_registry, :synced, :repository_verified)
subject.execute
expect(registry_verification_succeeded.reload).to have_attributes(
resync_repository: false,
repository_verification_checksum_sha: be_present,
repository_checksum_mismatch: false,
last_repository_verification_failure: nil
)
end
it 'does not mark projects pending verification to be resynced' do
registry_pending_verification =
create(:geo_project_registry, :synced, :repository_verification_outdated)
subject.execute
expect(registry_pending_verification.reload).to have_attributes(
resync_repository: false,
repository_verification_checksum_sha: nil,
repository_checksum_mismatch: false,
last_repository_verification_failure: nil
)
end
end
context 'for wikis' do
subject { described_class.new(:wiki) }
it 'returns the total number of projects marked for resync' do
create(:geo_project_registry, :synced, :wiki_verified)
create(:geo_project_registry, :synced, :wiki_verification_failed)
create(:geo_project_registry, :synced, :wiki_verification_failed)
expect(subject.execute).to eq 2
end
it 'marks projects where verification has failed to be resynced' do
registry_verification_failed =
create(:geo_project_registry, :synced, :wiki_verification_failed)
subject.execute
expect(registry_verification_failed.reload).to have_attributes(
resync_wiki: true,
wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil
)
end
it 'marks projects where checksum mismatch to be resynced' do
registry_checksum_mismatch =
create(:geo_project_registry, :synced, :wiki_checksum_mismatch)
subject.execute
expect(registry_checksum_mismatch.reload).to have_attributes(
resync_wiki: true,
wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil
)
end
it 'does not mark projects where verification succeeded to be resynced' do
registry_verification_succeeded =
create(:geo_project_registry, :synced, :wiki_verified)
subject.execute
expect(registry_verification_succeeded.reload).to have_attributes(
resync_wiki: false,
wiki_verification_checksum_sha: be_present,
wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil
)
end
it 'does not mark projects pending verification to be resynced' do
registry_pending_verification =
create(:geo_project_registry, :synced, :wiki_verification_outdated)
subject.execute
expect(registry_pending_verification.reload).to have_attributes(
resync_wiki: false,
wiki_verification_checksum_sha: nil,
wiki_checksum_mismatch: false,
last_wiki_verification_failure: nil
)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment