Commit 859dc126 authored by Gabriel Mazetto's avatar Gabriel Mazetto

Merge branch 'mk/secondary-side-verification' into 'master'

Geo: Automatically verify Packages on secondaries

See merge request gitlab-org/gitlab!49737
parents 062788e5 7c79ae53
......@@ -26,14 +26,6 @@ module Geo::ReplicableRegistry
def registry_consistency_worker_enabled?
replicator_class.enabled?
end
def verification_pending_batch(batch_size:)
[] # TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/13981
end
def verification_failed_batch(batch_size:)
[] # TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/13981
end
end
def replicator_class
......@@ -105,5 +97,9 @@ module Geo::ReplicableRegistry
super()
end
def replicator
self.class.replicator_class.new(model_record_id: model_record_id)
end
end
end
# frozen_string_literal: true
module Geo::VerifiableRegistry
extend ActiveSupport::Concern
class_methods do
extend ::Gitlab::Utils::Override
# Overrides a method in `Gitlab::Geo::VerificationState`. This method is
# used by `Gitlab::Geo::VerificationState.start_verification_batch` to
# produce a query which must return values of the primary key of the
# *model*, not of the *registry*. We need this so we can instantiate
# Replicators.
override :verification_state_model_key
def verification_state_model_key
self::MODEL_FOREIGN_KEY
end
override :verification_pending_batch_relation
def verification_pending_batch_relation(batch_size:)
super.synced
end
override :verification_failed_batch_relation
def verification_failed_batch_relation(batch_size:)
super.synced
end
override :needs_verification_relation
def needs_verification_relation
super.synced
end
end
included do
extend ::Gitlab::Utils::Override
sha_attribute :verification_checksum_mismatched
override :clear_verification_failure_fields!
def clear_verification_failure_fields!
super
# Note: If the return value of a `before_transition` block is `false`,
# then the transition is halted. Anything else, including `nil`, does not
# halt the transition.
self.checksum_mismatch = false
self.verification_checksum_mismatched = nil
end
# Records a checksum mismatch
#
# @param [String] checksum value which does not match the primary checksum
def verification_failed_due_to_mismatch!(checksum, primary_checksum)
message = 'Checksum does not match the primary checksum'
details = { checksum: checksum, primary_checksum: primary_checksum }
log_info(message, details)
self.verification_failure = "#{message} #{details}".truncate(255)
self.verification_checksum = checksum
self.verification_checksum_mismatched = checksum
self.checksum_mismatch = true
self.verification_failed!
end
private
override :track_checksum_result!
def track_checksum_result!(checksum, calculation_started_at)
unless replicator.matches_checksum?(checksum)
return verification_failed_due_to_mismatch!(checksum, replicator.primary_checksum)
end
verification_succeeded_with_checksum!(checksum, calculation_started_at)
end
end
end
......@@ -120,7 +120,7 @@ module Geo
# Also, if another verification job is running, this will make that job
# set state to pending after it finishes, since the calculated checksum
# is already invalidated.
model_record.verification_started!
verification_state_tracker.verification_started!
Geo::VerificationWorker.perform_async(replicable_name, model_record.id)
end
......@@ -128,7 +128,7 @@ module Geo
# Calculates checksum and asks the model/registry to manage verification
# state.
def verify
model_record.track_checksum_attempt! do
verification_state_tracker.track_checksum_attempt! do
model_record.calculate_checksum
end
end
......@@ -158,5 +158,9 @@ module Geo
def secondary_checksum
registry.verification_checksum
end
def verification_state_tracker
Gitlab::Geo.secondary? ? registry : model_record
end
end
end
......@@ -2,13 +2,11 @@
class Geo::PackageFileRegistry < Geo::BaseRegistry
include ::Geo::ReplicableRegistry
include ShaAttribute
include ::Gitlab::Geo::VerificationState
include ::Geo::VerifiableRegistry
MODEL_CLASS = ::Packages::PackageFile
MODEL_FOREIGN_KEY = :package_file_id
belongs_to :package_file, class_name: 'Packages::PackageFile'
sha_attribute :verification_checksum
sha_attribute :verification_checksum_mismatched
end
---
title: 'Geo: Add verification state fields to package file registry'
merge_request: 49737
author:
type: other
# frozen_string_literal: true
class AddVerificationFieldsToPackageFileRegistry < ActiveRecord::Migration[6.0]
DOWNTIME = false
def change
add_column :package_file_registry, :verification_state, :integer, default: 0, limit: 2, null: false
add_column :package_file_registry, :verification_started_at, :datetime_with_timezone
end
end
# frozen_string_literal: true
class AddVerificationIndexesToPackageFileRegistry < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
PENDING_VERIFICATION_INDEX_NAME = "package_file_registry_pending_verification"
FAILED_VERIFICATION_INDEX_NAME = "package_file_registry_failed_verification"
NEEDS_VERIFICATION_INDEX_NAME = "package_file_registry_needs_verification"
disable_ddl_transaction!
def up
add_concurrent_index :package_file_registry, :verified_at, where: "(state = 2 AND verification_state = 0)", order: { verified_at: 'ASC NULLS FIRST' }, name: PENDING_VERIFICATION_INDEX_NAME
add_concurrent_index :package_file_registry, :verification_retry_at, where: "(state = 2 AND verification_state = 3)", order: { verification_retry_at: 'ASC NULLS FIRST' }, name: FAILED_VERIFICATION_INDEX_NAME
add_concurrent_index :package_file_registry, :verification_state, where: "(state = 2 AND (verification_state IN (0, 3)))", name: NEEDS_VERIFICATION_INDEX_NAME
end
def down
remove_concurrent_index_by_name :package_file_registry, PENDING_VERIFICATION_INDEX_NAME
remove_concurrent_index_by_name :package_file_registry, FAILED_VERIFICATION_INDEX_NAME
remove_concurrent_index_by_name :package_file_registry, NEEDS_VERIFICATION_INDEX_NAME
end
end
......@@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 2020_09_24_184638) do
ActiveRecord::Schema.define(version: 2020_12_08_031224) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -119,9 +119,14 @@ ActiveRecord::Schema.define(version: 2020_09_24_184638) do
t.integer "verification_retry_count"
t.datetime_with_timezone "verified_at"
t.datetime_with_timezone "verification_retry_at"
t.integer "verification_state", limit: 2, default: 0, null: false
t.datetime_with_timezone "verification_started_at"
t.index ["package_file_id"], name: "index_package_file_registry_on_repository_id"
t.index ["retry_at"], name: "index_package_file_registry_on_retry_at"
t.index ["state"], name: "index_package_file_registry_on_state"
t.index ["verification_retry_at"], name: "package_file_registry_failed_verification", order: "NULLS FIRST", where: "((state = 2) AND (verification_state = 3))"
t.index ["verification_state"], name: "package_file_registry_needs_verification", where: "((state = 2) AND (verification_state = ANY (ARRAY[0, 3])))"
t.index ["verified_at"], name: "package_file_registry_pending_verification", order: "NULLS FIRST", where: "((state = 2) AND (verification_state = 0))"
end
create_table "project_registry", id: :serial, force: :cascade do |t|
......
......@@ -5,6 +5,7 @@ module Gitlab
class CronManager
COMMON_JOBS = %w[
geo_metrics_update_worker
geo_verification_cron_worker
repository_check_worker
].freeze
......
......@@ -35,7 +35,7 @@ module Gitlab
scope :checksummed, -> { where.not(verification_checksum: nil) }
scope :not_checksummed, -> { where(verification_checksum: nil) }
scope :verification_timed_out, -> { verification_started.where("verification_started_at < ?", VERIFICATION_TIMEOUT.ago) }
scope :needs_verification, -> { verification_pending.or(verification_failed) }
scope :needs_verification, -> { with_verification_state(:verification_pending, :verification_failed) }
# rubocop:enable CodeReuse/ActiveRecord
state_machine :verification_state, initial: :verification_pending do
......@@ -59,7 +59,6 @@ module Gitlab
end
before_transition any => :verification_failed do |instance, _|
instance.verification_checksum = nil
instance.verification_retry_count ||= 0
instance.verification_retry_count += 1
instance.verification_retry_at = instance.next_retry_time(instance.verification_retry_count)
......@@ -67,10 +66,8 @@ module Gitlab
end
before_transition any => :verification_succeeded do |instance, _|
instance.verification_retry_count = 0
instance.verification_retry_at = nil
instance.verification_failure = nil
instance.verified_at = Time.current
instance.clear_verification_failure_fields!
end
event :verification_started do
......@@ -104,25 +101,40 @@ module Gitlab
# query.
#
def verification_pending_batch(batch_size:)
relation = verification_pending.order(Gitlab::Database.nulls_first_order(:verified_at)).limit(batch_size) # rubocop:disable CodeReuse/ActiveRecord
relation = verification_pending_batch_relation(batch_size: batch_size)
start_verification_batch(relation)
end
# Overridden by Geo::VerifiableRegistry
def verification_pending_batch_relation(batch_size:)
verification_pending.order(Gitlab::Database.nulls_first_order(:verified_at)).limit(batch_size) # rubocop:disable CodeReuse/ActiveRecord
end
# Returns IDs of records that failed to verify (calculate and save checksum).
#
# Atomically marks those records "verification_started" in the same DB
# query.
#
def verification_failed_batch(batch_size:)
relation = verification_failed.order(Gitlab::Database.nulls_first_order(:verification_retry_at)).limit(batch_size) # rubocop:disable CodeReuse/ActiveRecord
relation = verification_failed_batch_relation(batch_size: batch_size)
start_verification_batch(relation)
end
# Overridden by Geo::VerifiableRegistry
def verification_failed_batch_relation(batch_size:)
verification_failed.order(Gitlab::Database.nulls_first_order(:verification_retry_at)).limit(batch_size) # rubocop:disable CodeReuse/ActiveRecord
end
# @return [Integer] number of records that need verification
def needs_verification_count(limit:)
needs_verification.limit(limit).count # rubocop:disable CodeReuse/ActiveRecord
needs_verification_relation.limit(limit).count # rubocop:disable CodeReuse/ActiveRecord
end
# Overridden by Geo::VerifiableRegistry
def needs_verification_relation
needs_verification
end
# Atomically marks the records as verification_started, with a
......@@ -138,7 +150,9 @@ module Gitlab
# This query performs a write, so we need to wrap it in a transaction
# to stick to the primary database.
self.transaction do
self.connection.execute(query).to_a.map { |row| row[self.primary_key] }
self.connection.execute(query).to_a.map do |row|
row[self.verification_state_model_key.to_s]
end
end
end
......@@ -155,11 +169,16 @@ module Gitlab
UPDATE #{table_name}
SET "verification_state" = #{started_enum_value},
"verification_started_at" = NOW()
WHERE #{self.primary_key} IN (#{relation.select(self.primary_key).to_sql})
RETURNING #{self.primary_key}
WHERE #{self.verification_state_model_key} IN (#{relation.select(self.verification_state_model_key).to_sql})
RETURNING #{self.verification_state_model_key}
SQL
end
# Overridden in ReplicableRegistry
def verification_state_model_key
self.primary_key
end
# Fail verification for records which started verification a long time ago
def fail_verification_timeouts
attrs = {
......@@ -177,6 +196,13 @@ module Gitlab
end
end
# Overridden by ReplicableRegistry
def clear_verification_failure_fields!
self.verification_retry_count = 0
self.verification_retry_at = nil
self.verification_failure = nil
end
# Provides a safe and easy way to manage the verification state for a
# synchronous checksum calculation.
#
......@@ -222,6 +248,8 @@ module Gitlab
self.verification_failure = message
self.verification_failure += ": #{error.message}" if error.respond_to?(:message)
self.verification_failure.truncate(255)
self.verification_checksum = nil
self.verification_failed!
end
......
......@@ -16,6 +16,7 @@ RSpec.describe Gitlab::Geo::CronManager, :geo do
geo_repository_verification_secondary_scheduler_worker
geo_metrics_update_worker
geo_prune_event_log_worker
geo_verification_cron_worker
].freeze
def job(name)
......@@ -28,7 +29,7 @@ RSpec.describe Gitlab::Geo::CronManager, :geo do
let_it_be(:primary) { create(:geo_node, :primary) }
let_it_be(:secondary) { create(:geo_node) }
let(:common_jobs) { [job('geo_metrics_update_worker'), job('repository_check_worker')] }
let(:common_jobs) { [job('geo_metrics_update_worker'), job('repository_check_worker'), job('geo_verification_cron_worker')] }
let(:ldap_test_job) { job('ldap_test') }
let(:primary_jobs) { [job('geo_repository_verification_primary_batch_worker')] }
......
......@@ -244,6 +244,7 @@ RSpec.describe Gitlab::Geo::VerificationState do
expect(subject.reload.verification_failed?).to be_truthy
expect(subject.reload.verification_failure).to eq 'Failure to calculate checksum: An error message'
expect(subject.verification_retry_count).to be 1
expect(subject.verification_checksum).to be_nil
end
end
end
......@@ -11,6 +11,7 @@ RSpec.describe Geo::PackageFileRegistry, :geo, type: :model do
end
include_examples 'a Geo framework registry'
include_examples 'a Geo verifiable registry'
describe '.find_registry_differences' do
let(:synced_group) { create(:group) }
......
......@@ -327,15 +327,41 @@ RSpec.shared_examples 'a verifiable replicator' do
describe '#verify' do
it 'wraps the checksum calculation in track_checksum_attempt!' do
tracker = double('tracker', calculate_checksum: 'abc123')
allow(replicator).to receive(:model_record).and_return(tracker)
tracker = double('tracker')
allow(replicator).to receive(:verification_state_tracker).and_return(tracker)
expect(model_record).to receive(:calculate_checksum)
expect(tracker).to receive(:track_checksum_attempt!).and_yield
replicator.verify
end
end
describe '#verification_state_tracker' do
context 'on a Geo primary' do
before do
stub_primary_node
end
it 'returns model_record' do
expect(replicator.verification_state_tracker).to eq(model_record)
end
end
context 'on a Geo secondary' do
before do
stub_secondary_node
end
it 'returns registry' do
registry = double('registry')
allow(replicator).to receive(:registry).and_return(registry)
expect(replicator.verification_state_tracker).to eq(registry)
end
end
end
context 'integration tests' do
before do
model_record.save!
......@@ -362,5 +388,34 @@ RSpec.shared_examples 'a verifiable replicator' do
end
end
end
context 'on a secondary' do
before do
stub_secondary_node
end
describe 'background backfill' do
it 'verifies registries' do
registry = replicator.registry
registry.start
registry.synced!
expect do
Geo::VerificationBatchWorker.new.perform(replicator.replicable_name)
end.to change { registry.reload.verification_succeeded? }.from(false).to(true)
end
end
describe 'triggered by events' do
it 'verifies registries' do
registry = replicator.registry
registry.save!
expect do
Geo::VerificationWorker.new.perform(replicator.replicable_name, replicator.model_record_id)
end.to change { registry.reload.verification_succeeded? }.from(false).to(true)
end
end
end
end
end
# frozen_string_literal: true
# TODO: Include these examples in 'a Geo framework registry' when *all*
# registries are verifiable https://gitlab.com/gitlab-org/gitlab/-/issues/280768
RSpec.shared_examples 'a Geo verifiable registry' do
let(:registry_class_factory) { described_class.underscore.tr('/', '_').to_sym }
subject(:registry_record) { create(registry_class_factory, :synced) }
describe '.verification_pending_batch' do
before do
subject.save!
end
it 'returns IDs of rows which are synced and pending verification' do
expect(described_class.verification_pending_batch(batch_size: 4)).to match_array([subject.model_record_id])
end
it 'excludes rows which are not synced or are not pending verification' do
# rubocop:disable Rails/SaveBang
create(registry_class_factory, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :started, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :failed, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_started))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_succeeded), verification_checksum: 'abc123')
# rubocop:enable Rails/SaveBang
expect(described_class.verification_pending_batch(batch_size: 4)).to match_array([subject.model_record_id])
end
it 'marks verification as started' do
described_class.verification_pending_batch(batch_size: 4)
expect(subject.reload.verification_started?).to be_truthy
expect(subject.verification_started_at).to be_present
end
end
describe '.verification_failed_batch' do
before do
subject.verification_failed_with_message!('foo')
end
it 'returns IDs of rows which are synced and failed verification' do
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
end
it 'excludes rows which are not synced or have not failed verification' do
# rubocop:disable Rails/SaveBang
create(registry_class_factory, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :started, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :failed, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo')
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_started))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_succeeded), verification_checksum: 'abc123')
# rubocop:enable Rails/SaveBang
expect(described_class.verification_failed_batch(batch_size: 4)).to match_array([subject.model_record_id])
end
it 'marks verification as started' do
described_class.verification_failed_batch(batch_size: 4)
expect(subject.reload.verification_started?).to be_truthy
expect(subject.verification_started_at).to be_present
end
end
describe '.needs_verification_count' do
before do
subject.save!
end
it 'returns the number of rows which are synced and (pending or failed) verification' do
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_failed), verification_failure: 'foo') # rubocop:disable Rails/SaveBang
expect(described_class.needs_verification_count(limit: 3)).to eq(2)
end
it 'excludes rows which are not synced or are not (pending or failed) verification' do
# rubocop:disable Rails/SaveBang
create(registry_class_factory, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :started, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :failed, verification_state: verification_state_value(:verification_pending))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_started))
create(registry_class_factory, :synced, verification_state: verification_state_value(:verification_succeeded), verification_checksum: 'abc123')
# rubocop:enable Rails/SaveBang
expect(described_class.needs_verification_count(limit: 3)).to eq(1)
end
end
describe '#verification_succeeded!', :aggregate_failures do
before do
subject.verification_started!
end
it 'clears checksum mismatch fields' do
subject.update!(checksum_mismatch: true, verification_checksum_mismatched: 'abc123')
subject.verification_checksum = 'abc123'
expect do
subject.verification_succeeded!
end.to change { subject.verification_succeeded? }.from(false).to(true)
expect(subject.checksum_mismatch).to eq(false)
expect(subject.verification_checksum_mismatched).to eq(nil)
end
end
describe '#track_checksum_attempt!', :aggregate_failures do
context 'when verification was not yet started' do
it 'starts verification' do
expect do
subject.track_checksum_attempt! do
'a_checksum_value'
end
end.to change { subject.verification_started_at }.from(nil)
end
context 'comparison with primary checksum' do
let(:replicator) { double('replicator') }
let(:calculated_checksum) { 'abc123' }
before do
allow(subject).to receive(:replicator).and_return(replicator)
allow(replicator).to receive(:matches_checksum?).with(calculated_checksum).and_return(matches_checksum)
end
context 'when the calculated checksum matches the primary checksum' do
let(:matches_checksum) { true }
it 'transitions to verification_succeeded and updates the checksum' do
expect do
subject.track_checksum_attempt! do
calculated_checksum
end
end.to change { subject.verification_succeeded? }.from(false).to(true)
expect(subject.verification_checksum).to eq(calculated_checksum)
end
end
context 'when the calculated checksum does not match the primary checksum' do
let(:matches_checksum) { false }
it 'transitions to verification_failed and updates mismatch fields' do
allow(replicator).to receive(:primary_checksum).and_return(calculated_checksum)
expect do
subject.track_checksum_attempt! do
calculated_checksum
end
end.to change { subject.verification_failed? }.from(false).to(true)
expect(subject.verification_checksum).to eq(calculated_checksum)
expect(subject.verification_checksum_mismatched).to eq(calculated_checksum)
expect(subject.checksum_mismatch).to eq(true)
expect(subject.verification_failure).to match('Checksum does not match the primary checksum')
end
end
end
end
context 'when verification was started' do
it 'does not update verification_started_at' do
subject.verification_started!
expected = subject.verification_started_at
subject.track_checksum_attempt! do
'a_checksum_value'
end
expect(subject.verification_started_at).to be_within(1.second).of(expected)
end
end
it 'yields to the checksum calculation' do
expect do |probe|
subject.track_checksum_attempt!(&probe)
end.to yield_with_no_args
end
context 'when an error occurs while yielding' do
it 'sets verification_failed' do
subject.track_checksum_attempt! do
raise 'an error'
end
expect(subject.reload.verification_failed?).to be_truthy
end
end
end
def verification_state_value(key)
described_class.verification_state_value(key)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment