From 1f004e12b6d3c7a496af4aacb29a13137a74cfe3 Mon Sep 17 00:00:00 2001 From: Douglas Barbosa Alexandre <dbalexandre@gmail.com> Date: Wed, 21 Mar 2018 14:37:00 +0000 Subject: [PATCH] Geo - Switch from time-based checking of outdated checksums to the nil-checksum-based approach --- db/schema.rb | 9 +- ee/app/finders/geo/project_registry_finder.rb | 112 ++++---- .../geo/repository_verification_finder.rb | 29 +-- ee/app/models/geo/project_registry.rb | 31 +-- ee/app/models/project_repository_state.rb | 38 +-- ee/app/services/ee/wiki_pages/base_service.rb | 3 +- ee/app/services/geo/base_sync_service.rb | 9 +- .../geo/repository_updated_service.rb | 47 ++++ .../repository_verify_secondary_service.rb | 62 ++--- ee/app/workers/ee/post_receive.rb | 10 +- .../primary/shard_worker.rb | 2 +- .../primary/single_worker.rb | 12 +- .../secondary/scheduler_worker.rb | 3 +- .../unreleased/da-repository-verification.yml | 6 + ...ct_registy_verification_failure_columns.rb | 29 +++ ...dex_to_project_registy_checksum_columns.rb | 29 +++ ...ailed_columns_from_geo_project_registry.rb | 13 + ...on_at_columns_from_geo_project_registry.rb | 13 + ee/db/geo/schema.rb | 10 +- ...ject_repository_states_checksum_columns.rb | 25 ++ ..._repository_states_verification_columns.rb | 29 +++ ..._columns_from_project_repository_states.rb | 13 + ..._columns_from_project_repository_states.rb | 13 + ee/lib/gitlab/geo/database_tasks.rb | 16 +- ee/lib/gitlab/geo/health_check.rb | 7 +- ee/lib/gitlab/geo/log_cursor/daemon.rb | 3 +- ee/spec/factories/geo/project_registry.rb | 36 +-- .../factories/project_repository_states.rb | 30 ++- .../geo/project_registry_finder_spec.rb | 246 +++++++----------- .../repository_verification_finder_spec.rb | 10 - .../lib/gitlab/geo/log_cursor/daemon_spec.rb | 42 ++- .../models/project_repository_state_spec.rb | 56 ++-- .../geo/repository_sync_service_spec.rb | 12 + .../geo/repository_updated_service_spec.rb | 70 +++++ ...epository_verify_secondary_service_spec.rb | 153 +++-------- .../services/geo/wiki_sync_service_spec.rb | 12 + .../wiki_pages/create_service_spec.rb | 23 +- .../wiki_pages/destroy_service_spec.rb | 23 +- .../wiki_pages/update_service_spec.rb | 23 +- .../primary/shard_worker_spec.rb | 16 +- .../primary/single_worker_spec.rb | 88 ++++--- ee/spec/workers/post_receive_spec.rb | 29 ++- 42 files changed, 808 insertions(+), 634 deletions(-) create mode 100644 ee/app/services/geo/repository_updated_service.rb create mode 100644 ee/changelogs/unreleased/da-repository-verification.yml create mode 100644 ee/db/geo/migrate/20180314175612_add_partial_index_to_project_registy_verification_failure_columns.rb create mode 100644 ee/db/geo/migrate/20180315222132_add_partial_index_to_project_registy_checksum_columns.rb create mode 100644 ee/db/geo/post_migrate/20180320011914_remove_last_verification_failed_columns_from_geo_project_registry.rb create mode 100644 ee/db/geo/post_migrate/20180320013929_remove_last_verification_at_columns_from_geo_project_registry.rb create mode 100644 ee/db/migrate/20180308234102_add_partial_index_to_project_repository_states_checksum_columns.rb create mode 100644 ee/db/migrate/20180314174825_add_partial_index_to_project_repository_states_verification_columns.rb create mode 100644 ee/db/post_migrate/20180309215236_remove_last_verication_at_columns_from_project_repository_states.rb create mode 100644 ee/db/post_migrate/20180314172513_remove_last_verication_failed_columns_from_project_repository_states.rb create mode 100644 ee/spec/services/geo/repository_updated_service_spec.rb diff --git a/db/schema.rb b/db/schema.rb index e6d281cd0e8..384ebc5e6c3 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20180314100728) do +ActiveRecord::Schema.define(version: 20180314174825) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -1900,15 +1900,14 @@ ActiveRecord::Schema.define(version: 20180314100728) do t.integer "project_id", null: false t.binary "repository_verification_checksum" t.binary "wiki_verification_checksum" - t.boolean "last_repository_verification_failed", default: false, null: false - t.boolean "last_wiki_verification_failed", default: false, null: false - t.datetime_with_timezone "last_repository_verification_at" - t.datetime_with_timezone "last_wiki_verification_at" t.string "last_repository_verification_failure" t.string "last_wiki_verification_failure" end + add_index "project_repository_states", ["last_repository_verification_failure"], name: "idx_repository_states_on_repository_failure_partial", where: "(last_repository_verification_failure IS NOT NULL)", using: :btree + add_index "project_repository_states", ["last_wiki_verification_failure"], name: "idx_repository_states_on_wiki_failure_partial", where: "(last_wiki_verification_failure IS NOT NULL)", using: :btree add_index "project_repository_states", ["project_id"], name: "index_project_repository_states_on_project_id", unique: true, using: :btree + add_index "project_repository_states", ["repository_verification_checksum", "wiki_verification_checksum"], name: "idx_repository_states_on_checksums_partial", where: "((repository_verification_checksum IS NULL) OR (wiki_verification_checksum IS NULL))", using: :btree create_table "project_statistics", force: :cascade do |t| t.integer "project_id", null: false diff --git a/ee/app/finders/geo/project_registry_finder.rb b/ee/app/finders/geo/project_registry_finder.rb index 9b718d72c10..e8ae758f766 100644 --- a/ee/app/finders/geo/project_registry_finder.rb +++ b/ee/app/finders/geo/project_registry_finder.rb @@ -84,12 +84,12 @@ module Geo end end - # find all registries that need a repository or wiki verified - def find_registries_to_verify + # Find all registries that need a repository or wiki verification + def find_registries_to_verify(batch_size:) if use_legacy_queries? - legacy_find_registries_to_verify + legacy_find_registries_to_verify(batch_size: batch_size) else - fdw_find_registries_to_verify + fdw_find_registries_to_verify(batch_size: batch_size) end end @@ -151,34 +151,6 @@ module Geo end end - def conditions_for_verification(type, use_fdw = true) - last_verification_failed = "last_#{type}_verification_failed".to_sym - verification_checksum = "#{type}_verification_checksum".to_sym - last_verification_at = "last_#{type}_verification_at".to_sym - - state_arel = use_fdw ? fdw_repository_state_arel : legacy_repository_state_arel - - # primary verification did not fail - primary_verification_not_failed = state_arel[last_verification_failed].eq(false) - - # primary checksum is not NULL - primary_has_checksum = state_arel[verification_checksum].not_eq(nil) - - # primary was verified later than the secondary verification - primary_recently_verified = state_arel[last_verification_at].gt(registry_arel[last_verification_at]) - .or(registry_arel[last_verification_at].eq(nil)) - - # secondary verification failed and the last verification was over 24.hours.ago - # this allows us to retry any verification failures if they haven't already corrected themselves - secondary_failure_period = registry_arel[last_verification_at].lt(24.hours.ago) - .and(registry_arel[last_verification_failed].eq(true)) - - primary_verification_not_failed - .and(primary_has_checksum) - .and(primary_recently_verified) - .or(secondary_failure_period) - end - # # FDW accessors # @@ -213,12 +185,28 @@ module Geo .merge(Geo::ProjectRegistry.retry_due) end - # find all registries that need a repository or wiki verified + # Find all registries that repository or wiki need verification # @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of registries that need verification - def fdw_find_registries_to_verify + def fdw_find_registries_to_verify(batch_size:) Geo::ProjectRegistry - .joins("LEFT OUTER JOIN #{fdw_repository_state_table} ON #{fdw_repository_state_table}.project_id = project_registry.project_id") - .where(conditions_for_verification(:repository, true).or(conditions_for_verification(:wiki, true))) + .joins(fdw_inner_join_repository_state) + .where( + local_registry_table[:repository_verification_checksum].eq(nil).or( + local_registry_table[:wiki_verification_checksum].eq(nil) + ) + ) + .where( + fdw_repository_state_table[:repository_verification_checksum].not_eq(nil).or( + fdw_repository_state_table[:wiki_verification_checksum].not_eq(nil) + ) + ).limit(batch_size) + end + + def fdw_inner_join_repository_state + local_registry_table + .join(fdw_repository_state_table, Arel::Nodes::InnerJoin) + .on(local_registry_table[:project_id].eq(fdw_repository_state_table[:project_id])) + .join_sources end # @@ -311,44 +299,42 @@ module Geo end # @return [ActiveRecord::Relation<Geo::ProjectRegistry>] list of registries that need verification - def legacy_find_registries_to_verify - registries = Geo::ProjectRegistry - .pluck(:project_id, :last_repository_verification_at, :last_wiki_verification_at, - :last_repository_verification_failed, :last_wiki_verification_failed) + def legacy_find_registries_to_verify(batch_size:) + registries = Geo::ProjectRegistry.where( + local_registry_table[:repository_verification_checksum].eq(nil).or( + local_registry_table[:wiki_verification_checksum].eq(nil) + ) + ).pluck(:project_id) return Geo::ProjectRegistry.none if registries.empty? - id_and_values = registries.map do |project_id, repo_at, wiki_at, repo_failed, wiki_failed| - "(#{project_id}, to_timestamp(#{repo_at.to_i}), to_timestamp(#{wiki_at.to_i}), - #{quote_value(repo_failed)}, #{quote_value(wiki_failed)})" - end + id_and_want_to_sync = registries.map { |project_id| "(#{project_id}, #{quote_value(true)})" } - joined_relation = ProjectRepositoryState.joins(<<~SQL) - INNER JOIN - (VALUES #{id_and_values.join(',')}) - project_registry(project_id, last_repository_verification_at, last_wiki_verification_at, - last_repository_verification_failed, last_wiki_verification_failed) - ON #{ProjectRepositoryState.table_name}.project_id = project_registry.project_id - SQL + joined_relation = + ProjectRepositoryState.joins(<<~SQL) + INNER JOIN + (VALUES #{id_and_want_to_sync.join(',')}) + project_registry(project_id, want_to_sync) + ON #{legacy_repository_state_table.name}.project_id = project_registry.project_id + SQL project_ids = joined_relation - .where(conditions_for_verification(:repository, false).or(conditions_for_verification(:wiki, false))) - .pluck(:project_id) + .where( + legacy_repository_state_table[:repository_verification_checksum].not_eq(nil).or( + legacy_repository_state_table[:wiki_verification_checksum].not_eq(nil) + ) + ).where( + project_registry: { want_to_sync: true } + ).limit(batch_size).pluck(:project_id) - ::Geo::ProjectRegistry.where(project_id: project_ids) + Geo::ProjectRegistry.where(project_id: project_ids) end - private - - def registry_arel + def local_registry_table Geo::ProjectRegistry.arel_table end - def fdw_repository_state_arel - Geo::Fdw::ProjectRepositoryState.arel_table - end - - def legacy_repository_state_arel + def legacy_repository_state_table ::ProjectRepositoryState.arel_table end @@ -357,7 +343,7 @@ module Geo end def fdw_repository_state_table - Geo::Fdw::ProjectRepositoryState.table_name + Geo::Fdw::ProjectRepositoryState.arel_table end end end diff --git a/ee/app/finders/geo/repository_verification_finder.rb b/ee/app/finders/geo/repository_verification_finder.rb index 70049d4f9ce..59f63b49d9c 100644 --- a/ee/app/finders/geo/repository_verification_finder.rb +++ b/ee/app/finders/geo/repository_verification_finder.rb @@ -2,20 +2,19 @@ module Geo class RepositoryVerificationFinder def find_outdated_projects(batch_size:) Project.select(:id) + .with_route .joins(:repository_state) - .where(repository_unverified - .or(repository_verification_outdated) - .or(wiki_unverified) - .or(wiki_verification_outdated)) - .order(repository_state_table[:last_repository_verification_at].asc) + .where(repository_outdated.or(wiki_outdated)) + .order(last_repository_updated_at_asc) .limit(batch_size) end def find_unverified_projects(batch_size:) Project.select(:id) + .with_route .joins(left_join_repository_state) .where(repository_never_verified) - .order(projects_table[:last_repository_updated_at].asc) + .order(last_repository_updated_at_asc) .limit(batch_size) end @@ -52,26 +51,20 @@ module Geo .join_sources end - def repository_unverified + def repository_outdated repository_state_table[:repository_verification_checksum].eq(nil) end - def repository_verification_outdated - repository_state_table[:last_repository_verification_at] - .lt(projects_table[:last_repository_updated_at]) - end - - def wiki_unverified + def wiki_outdated repository_state_table[:wiki_verification_checksum].eq(nil) end - def wiki_verification_outdated - repository_state_table[:last_wiki_verification_at] - .lt(projects_table[:last_repository_updated_at]) - end - def repository_never_verified repository_state_table[:id].eq(nil) end + + def last_repository_updated_at_asc + Gitlab::Database.nulls_last_order('projects.last_repository_updated_at', 'ASC') + end end end diff --git a/ee/app/models/geo/project_registry.rb b/ee/app/models/geo/project_registry.rb index 77797bc62d2..3fd8c68ffc7 100644 --- a/ee/app/models/geo/project_registry.rb +++ b/ee/app/models/geo/project_registry.rb @@ -1,5 +1,11 @@ class Geo::ProjectRegistry < Geo::BaseRegistry include ::EachBatch + include ::IgnorableColumn + + ignore_column :last_repository_verification_at + ignore_column :last_repository_verification_failed + ignore_column :last_wiki_verification_at + ignore_column :last_wiki_verification_failed belongs_to :project @@ -8,8 +14,10 @@ class Geo::ProjectRegistry < Geo::BaseRegistry scope :dirty, -> { where(arel_table[:resync_repository].eq(true).or(arel_table[:resync_wiki].eq(true))) } scope :failed_repos, -> { where(arel_table[:repository_retry_count].gt(0)) } scope :failed_wikis, -> { where(arel_table[:wiki_retry_count].gt(0)) } - scope :verification_failed_repos, -> { where(arel_table[:last_repository_verification_failed].eq(true)) } - scope :verification_failed_wikis, -> { where(arel_table[:last_wiki_verification_failed].eq(true)) } + scope :verified_repos, -> { where.not(repository_verification_checksum: nil) } + scope :verified_wikis, -> { where.not(wiki_verification_checksum: nil) } + scope :verification_failed_repos, -> { where.not(last_repository_verification_failure: nil) } + scope :verification_failed_wikis, -> { where.not(last_wiki_verification_failure: nil) } def self.failed repository_sync_failed = arel_table[:repository_retry_count].gt(0) @@ -19,8 +27,8 @@ class Geo::ProjectRegistry < Geo::BaseRegistry end def self.verification_failed - repository_verification_failed = arel_table[:last_repository_verification_failed].eq(true) - wiki_verification_failed = arel_table[:last_wiki_verification_failed].eq(true) + repository_verification_failed = arel_table[:last_repository_verification_failure].not_eq(nil) + wiki_verification_failed = arel_table[:last_wiki_verification_failure].not_eq(nil) where(repository_verification_failed.or(wiki_verification_failed)) end @@ -44,16 +52,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry .where(resync_wiki: false) end - def self.verified_repos - where.not(last_repository_verification_at: nil, repository_verification_checksum: nil) - .where(last_repository_verification_failed: false) - end - - def self.verified_wikis - where.not(last_wiki_verification_at: nil, wiki_verification_checksum: nil) - .where(last_wiki_verification_failed: false) - end - def repository_sync_due?(scheduled_time) never_synced_repository? || repository_sync_needed?(scheduled_time) end @@ -62,11 +60,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry project.wiki_enabled? && (never_synced_wiki? || wiki_sync_needed?(scheduled_time)) end - delegate :repository_state, to: :project - delegate :repository_verification_checksum, :last_repository_verification_at, - :wiki_verification_checksum, :last_wiki_verification_at, - to: :repository_state, allow_nil: true, prefix: :project - def repository_path(type) repo_path = project.disk_path diff --git a/ee/app/models/project_repository_state.rb b/ee/app/models/project_repository_state.rb index 4f7648ee22a..b7fea271eee 100644 --- a/ee/app/models/project_repository_state.rb +++ b/ee/app/models/project_repository_state.rb @@ -1,6 +1,12 @@ class ProjectRepositoryState < ActiveRecord::Base + include IgnorableColumn include ShaAttribute + ignore_column :last_repository_verification_at + ignore_column :last_repository_verification_failed + ignore_column :last_wiki_verification_at + ignore_column :last_wiki_verification_failed + sha_attribute :repository_verification_checksum sha_attribute :wiki_verification_checksum @@ -8,34 +14,18 @@ class ProjectRepositoryState < ActiveRecord::Base validates :project, presence: true, uniqueness: true - scope :verification_failed_repos, -> { where(arel_table[:last_repository_verification_failed].eq(true)) } - scope :verification_failed_wikis, -> { where(arel_table[:last_wiki_verification_failed].eq(true)) } + scope :verification_failed_repos, -> { where.not(last_repository_verification_failure: nil) } + scope :verification_failed_wikis, -> { where.not(last_wiki_verification_failure: nil) } + scope :verified_repos, -> { where.not(repository_verification_checksum: nil).where(last_repository_verification_failure: nil) } + scope :verified_wikis, -> { where.not(wiki_verification_checksum: nil).where(last_wiki_verification_failure: nil) } - def repository_checksum_outdated?(timestamp) - repository_verification_checksum.nil? || recalculate_repository_checksum?(timestamp) + def repository_checksum_outdated? + repository_verification_checksum.nil? end - def wiki_checksum_outdated?(timestamp) + def wiki_checksum_outdated? return false unless project.wiki_enabled? - wiki_verification_checksum.nil? || recalculate_wiki_checksum?(timestamp) - end - - def self.verified_repos - where.not(repository_verification_checksum: nil) - .where(last_repository_verification_failed: false) - end - - def self.verified_wikis - where.not(wiki_verification_checksum: nil) - .where(last_wiki_verification_failed: false) - end - - def recalculate_repository_checksum?(timestamp) - last_repository_verification_at.nil? || timestamp > last_repository_verification_at - end - - def recalculate_wiki_checksum?(timestamp) - last_wiki_verification_at.nil? || timestamp > last_wiki_verification_at + wiki_verification_checksum.nil? end end diff --git a/ee/app/services/ee/wiki_pages/base_service.rb b/ee/app/services/ee/wiki_pages/base_service.rb index a221fc4a11f..32d6e05146a 100644 --- a/ee/app/services/ee/wiki_pages/base_service.rb +++ b/ee/app/services/ee/wiki_pages/base_service.rb @@ -16,8 +16,7 @@ module EE def process_wiki_repository_update if ::Gitlab::Geo.primary? - # Create wiki repository updated event on Geo event log - ::Geo::RepositoryUpdatedEventStore.new(project, source: ::Geo::RepositoryUpdatedEvent::WIKI).create + ::Geo::RepositoryUpdatedService.new(project, source: ::Geo::RepositoryUpdatedEvent::WIKI).execute end end end diff --git a/ee/app/services/geo/base_sync_service.rb b/ee/app/services/geo/base_sync_service.rb index e4965720ba0..ea93238aada 100644 --- a/ee/app/services/geo/base_sync_service.rb +++ b/ee/app/services/geo/base_sync_service.rb @@ -112,11 +112,6 @@ module Geo attrs["last_#{type}_synced_at"] = started_at attrs["#{type}_retry_count"] = retry_count + 1 attrs["#{type}_retry_at"] = next_retry_time(attrs["#{type}_retry_count"]) - - # indicate that repository verification needs to be done again - attrs["#{type}_verification_checksum"] = nil - attrs["last_#{type}_verification_at"] = nil - attrs["last_#{type}_verification_failure"] = nil end if finished_at @@ -125,6 +120,10 @@ module Geo attrs["#{type}_retry_count"] = nil attrs["#{type}_retry_at"] = nil attrs["force_to_redownload_#{type}"] = false + + # Indicate that repository verification needs to be done again + attrs["#{type}_verification_checksum"] = nil + attrs["last_#{type}_verification_failure"] = nil end registry.update!(attrs) diff --git a/ee/app/services/geo/repository_updated_service.rb b/ee/app/services/geo/repository_updated_service.rb new file mode 100644 index 00000000000..db1b61aa69a --- /dev/null +++ b/ee/app/services/geo/repository_updated_service.rb @@ -0,0 +1,47 @@ +module Geo + class RepositoryUpdatedService + include ::Gitlab::Geo::ProjectLogHelpers + + def initialize(project, params = {}) + @project = project + @params = params + @refs = params.fetch(:refs, []) + @changes = params.fetch(:changes, []) + @source = params.fetch(:source, Geo::RepositoryUpdatedEvent::REPOSITORY) + end + + def execute + return false unless Gitlab::Geo.primary? + + reset_repository_checksum! + create_repository_updated_event! + + true + end + + private + + attr_reader :project, :refs, :changes, :source + + delegate :repository_state, to: :project + + def create_repository_updated_event! + Geo::RepositoryUpdatedEventStore.new( + project, refs: refs, changes: changes, source: source + ).create + end + + def reset_repository_checksum! + return if repository_state.nil? + + repository_state.update!("#{repository_checksum_column}" => nil) + rescue => e + log_error('Cannot reset repository checksum', e) + raise Gitlab::Git::Checksum::Failure, "Cannot reset repository checksum: #{e}" + end + + def repository_checksum_column + "#{Geo::RepositoryUpdatedEvent.sources.key(source)}_verification_checksum" + end + end +end diff --git a/ee/app/services/geo/repository_verify_secondary_service.rb b/ee/app/services/geo/repository_verify_secondary_service.rb index 317f989bcdb..b9bf66bbe6d 100644 --- a/ee/app/services/geo/repository_verify_secondary_service.rb +++ b/ee/app/services/geo/repository_verify_secondary_service.rb @@ -1,5 +1,3 @@ -# rubocop:disable GitlabSecurity/PublicSend - module Geo class RepositoryVerifySecondaryService include Gitlab::Geo::RepositoryVerificationLogHelpers @@ -19,72 +17,56 @@ module Geo verify_checksum end - # This is primarily a guard method, to reduce the chance of false failures (which could happen - # for repositories that change very rapidly) - def should_verify_checksum? - primary_checksum = registry.repository_state.public_send("#{type}_verification_checksum") - secondary_checksum = registry.public_send("#{type}_verification_checksum") - primary_last_verification_at = registry.repository_state.public_send("last_#{type}_verification_at") - secondary_last_verification_at = registry.public_send("last_#{type}_verification_at") || Time.at(0) - secondary_last_successful_sync_at = registry.public_send("last_#{type}_successful_sync_at") - - # primary repository was verified (even if checksum is nil). - # note: we allow a nil primary checksum so that we will run through the checksum - # and set the verification date on the secondary. Otherwise, we'll keep revisiting - # this record over and over. - return false if primary_last_verification_at.nil? + private - # secondary repository checksum does not equal the primary repository checksum - return false if secondary_checksum == primary_checksum && !primary_checksum.nil? + attr_reader :registry, :type - # primary was verified later than the secondary verification - return false if primary_last_verification_at < secondary_last_verification_at + def should_verify_checksum? + return false if resync? - # secondary repository was successfully synced after the last secondary verification - return false if secondary_last_successful_sync_at.nil? || secondary_last_successful_sync_at < secondary_last_verification_at + primary_checksum.present? && primary_checksum != secondary_checksum + end - true + def resync? + registry.public_send("resync_#{type}") # rubocop:disable GitlabSecurity/PublicSend end - private + def primary_checksum + project.repository_state.public_send("#{type}_verification_checksum") # rubocop:disable GitlabSecurity/PublicSend + end - attr_reader :registry, :type + def secondary_checksum + registry.public_send("#{type}_verification_checksum") # rubocop:disable GitlabSecurity/PublicSend + end def verify_checksum checksum = calculate_checksum(project.repository_storage, repository_path) if mismatch?(checksum) - record_status(error_msg: "#{type.to_s.capitalize} checksum mismatch: #{repository_path}") + update_registry!(failure: "#{type.to_s.capitalize} checksum mismatch: #{repository_path}") else - record_status(checksum: checksum) + update_registry!(checksum: checksum) end rescue ::Gitlab::Git::Repository::NoRepository, ::Gitlab::Git::Checksum::Failure, Timeout::Error => e - record_status(error_msg: "Error verifying #{type.to_s.capitalize} checksum: #{repository_path}", exception: e) + update_registry!(failure: "Error verifying #{type.to_s.capitalize} checksum: #{repository_path}", exception: e) end def mismatch?(checksum) - checksum != registry.public_send("project_#{type}_verification_checksum") + primary_checksum != checksum end def calculate_checksum(storage, relative_path) Gitlab::Git::Checksum.new(storage, relative_path).calculate end - # note: the `last_#{type}_verification_at` is always set, indicating that was the - # time that we _did_ a verification, success or failure - def record_status(checksum: nil, error_msg: nil, exception: nil, details: {}) + def update_registry!(checksum: nil, failure: nil, exception: nil, details: {}) attrs = { "#{type}_verification_checksum" => checksum, - "last_#{type}_verification_at" => DateTime.now, - "last_#{type}_verification_failure" => nil, - "last_#{type}_verification_failed" => false + "last_#{type}_verification_failure" => failure } - if error_msg - attrs["last_#{type}_verification_failed"] = true - attrs["last_#{type}_verification_failure"] = error_msg - - log_error(error_msg, exception, type: type, repository_path: repository_path, full_path: path_to_repo) + if failure + log_error(failure, exception, type: type, repository_path: repository_path, full_path: path_to_repo) end registry.update!(attrs) diff --git a/ee/app/workers/ee/post_receive.rb b/ee/app/workers/ee/post_receive.rb index 2f0886a9ad3..97539d62800 100644 --- a/ee/app/workers/ee/post_receive.rb +++ b/ee/app/workers/ee/post_receive.rb @@ -11,8 +11,9 @@ module EE def after_project_changes_hooks(post_received, user, refs, changes) super - # Generate repository updated event on Geo event log when Geo is enabled - ::Geo::RepositoryUpdatedEventStore.new(post_received.project, refs: refs, changes: changes).create + if ::Gitlab::Geo.primary? + ::Geo::RepositoryUpdatedService.new(post_received.project, refs: refs, changes: changes).execute + end end def process_wiki_changes(post_received) @@ -20,9 +21,8 @@ module EE update_wiki_es_indexes(post_received) - if ::Gitlab::Geo.enabled? - # Create wiki repository updated event on Geo event log - ::Geo::RepositoryUpdatedEventStore.new(post_received.project, source: ::Geo::RepositoryUpdatedEvent::WIKI).create + if ::Gitlab::Geo.primary? + ::Geo::RepositoryUpdatedService.new(post_received.project, source: ::Geo::RepositoryUpdatedEvent::WIKI).execute end end diff --git a/ee/app/workers/geo/repository_verification/primary/shard_worker.rb b/ee/app/workers/geo/repository_verification/primary/shard_worker.rb index b267bd1fdbf..3d5c47893b7 100644 --- a/ee/app/workers/geo/repository_verification/primary/shard_worker.rb +++ b/ee/app/workers/geo/repository_verification/primary/shard_worker.rb @@ -32,7 +32,7 @@ module Geo end def schedule_job(project_id) - job_id = Geo::RepositoryVerification::Primary::SingleWorker.perform_async(project_id, Time.now) + job_id = Geo::RepositoryVerification::Primary::SingleWorker.perform_async(project_id) { id: project_id, job_id: job_id } if job_id end diff --git a/ee/app/workers/geo/repository_verification/primary/single_worker.rb b/ee/app/workers/geo/repository_verification/primary/single_worker.rb index 1bc6a62142e..8d4d49e0209 100644 --- a/ee/app/workers/geo/repository_verification/primary/single_worker.rb +++ b/ee/app/workers/geo/repository_verification/primary/single_worker.rb @@ -11,15 +11,15 @@ module Geo attr_reader :project - def perform(project_id, scheduled_time) + def perform(project_id) return unless Gitlab::Geo.primary? @project = Project.find_by(id: project_id) return if project.nil? || project.pending_delete? try_obtain_lease do - calculate_repository_checksum if repository_state.repository_checksum_outdated?(scheduled_time) - calculate_wiki_checksum if repository_state.wiki_checksum_outdated?(scheduled_time) + calculate_repository_checksum if repository_state.repository_checksum_outdated? + calculate_wiki_checksum if repository_state.wiki_checksum_outdated? end end @@ -38,14 +38,12 @@ module Geo update_repository_state!(type, checksum: checksum.calculate) rescue => e log_error('Error calculating the repository checksum', e, type: type) - update_repository_state!(type, failed: true, failure: e.message) + update_repository_state!(type, failure: e.message) end - def update_repository_state!(type, checksum: nil, failed: false, failure: nil) + def update_repository_state!(type, checksum: nil, failure: nil) repository_state.update!( "#{type}_verification_checksum" => checksum, - "last_#{type}_verification_at" => DateTime.now, - "last_#{type}_verification_failed" => failed, "last_#{type}_verification_failure" => failure ) end diff --git a/ee/app/workers/geo/repository_verification/secondary/scheduler_worker.rb b/ee/app/workers/geo/repository_verification/secondary/scheduler_worker.rb index cbb79e29740..62f1e93368f 100644 --- a/ee/app/workers/geo/repository_verification/secondary/scheduler_worker.rb +++ b/ee/app/workers/geo/repository_verification/secondary/scheduler_worker.rb @@ -19,7 +19,8 @@ module Geo end def load_pending_resources - finder.find_registries_to_verify.limit(db_retrieve_batch_size).pluck(:id) + finder.find_registries_to_verify(batch_size: db_retrieve_batch_size) + .pluck(:id) end def schedule_job(registry_id) diff --git a/ee/changelogs/unreleased/da-repository-verification.yml b/ee/changelogs/unreleased/da-repository-verification.yml new file mode 100644 index 00000000000..165a8d9e7d0 --- /dev/null +++ b/ee/changelogs/unreleased/da-repository-verification.yml @@ -0,0 +1,6 @@ +--- +title: Geo - Switch from time-based checking of outdated checksums to the nil-checksum-based + approach +merge_request: +author: +type: changed diff --git a/ee/db/geo/migrate/20180314175612_add_partial_index_to_project_registy_verification_failure_columns.rb b/ee/db/geo/migrate/20180314175612_add_partial_index_to_project_registy_verification_failure_columns.rb new file mode 100644 index 00000000000..12dfcfcbe3e --- /dev/null +++ b/ee/db/geo/migrate/20180314175612_add_partial_index_to_project_registy_verification_failure_columns.rb @@ -0,0 +1,29 @@ +class AddPartialIndexToProjectRegistyVerificationFailureColumns < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + REPOSITORY_INDEX_NAME = 'idx_project_registry_on_repository_failure_partial' + WIKI_INDEX_NAME = 'idx_project_registry_on_wiki_failure_partial' + + disable_ddl_transaction! + + def up + unless index_exists?(:project_registry, :project_id, name: REPOSITORY_INDEX_NAME) + add_concurrent_index(:project_registry, :project_id, name: REPOSITORY_INDEX_NAME, where: 'last_repository_verification_failure IS NOT NULL') + end + + unless index_exists?(:project_registry, :project_id, name: WIKI_INDEX_NAME) + add_concurrent_index(:project_registry, :project_id, name: WIKI_INDEX_NAME, where: 'last_wiki_verification_failure IS NOT NULL') + end + end + + def down + if index_exists?(:project_registry, :project_id, name: REPOSITORY_INDEX_NAME) + remove_concurrent_index_by_name(:project_registry, REPOSITORY_INDEX_NAME) + end + + if index_exists?(:project_registry, :project_id, name: WIKI_INDEX_NAME) + remove_concurrent_index_by_name(:project_registry, WIKI_INDEX_NAME) + end + end +end diff --git a/ee/db/geo/migrate/20180315222132_add_partial_index_to_project_registy_checksum_columns.rb b/ee/db/geo/migrate/20180315222132_add_partial_index_to_project_registy_checksum_columns.rb new file mode 100644 index 00000000000..1cc303dd6c6 --- /dev/null +++ b/ee/db/geo/migrate/20180315222132_add_partial_index_to_project_registy_checksum_columns.rb @@ -0,0 +1,29 @@ +class AddPartialIndexToProjectRegistyChecksumColumns < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + REPOSITORY_INDEX_NAME = 'idx_project_registry_on_repository_checksum_partial' + WIKI_INDEX_NAME = 'idx_project_registry_on_wiki_checksum_partial' + + disable_ddl_transaction! + + def up + unless index_exists?(:project_registry, :repository_verification_checksum, name: REPOSITORY_INDEX_NAME) + add_concurrent_index(:project_registry, :repository_verification_checksum, name: REPOSITORY_INDEX_NAME, where: 'repository_verification_checksum IS NULL') + end + + unless index_exists?(:project_registry, :wiki_verification_checksum, name: WIKI_INDEX_NAME) + add_concurrent_index(:project_registry, :wiki_verification_checksum, name: WIKI_INDEX_NAME, where: 'wiki_verification_checksum IS NULL') + end + end + + def down + if index_exists?(:project_registry, :repository_verification_checksum, name: REPOSITORY_INDEX_NAME) + remove_concurrent_index_by_name(:project_registry, REPOSITORY_INDEX_NAME) + end + + if index_exists?(:project_registry, :wiki_verification_checksum, name: WIKI_INDEX_NAME) + remove_concurrent_index_by_name(:project_registry, WIKI_INDEX_NAME) + end + end +end diff --git a/ee/db/geo/post_migrate/20180320011914_remove_last_verification_failed_columns_from_geo_project_registry.rb b/ee/db/geo/post_migrate/20180320011914_remove_last_verification_failed_columns_from_geo_project_registry.rb new file mode 100644 index 00000000000..b4fbf00c311 --- /dev/null +++ b/ee/db/geo/post_migrate/20180320011914_remove_last_verification_failed_columns_from_geo_project_registry.rb @@ -0,0 +1,13 @@ +class RemoveLastVerificationFailedColumnsFromGeoProjectRegistry < ActiveRecord::Migration + DOWNTIME = false + + def up + remove_column :project_registry, :last_repository_verification_failed + remove_column :project_registry, :last_wiki_verification_failed + end + + def down + add_column :project_registry, :last_repository_verification_failed, :boolean, default: false, null: false + add_column :project_registry, :last_wiki_verification_failed, :boolean, default: false, null: false + end +end diff --git a/ee/db/geo/post_migrate/20180320013929_remove_last_verification_at_columns_from_geo_project_registry.rb b/ee/db/geo/post_migrate/20180320013929_remove_last_verification_at_columns_from_geo_project_registry.rb new file mode 100644 index 00000000000..d28864a7db5 --- /dev/null +++ b/ee/db/geo/post_migrate/20180320013929_remove_last_verification_at_columns_from_geo_project_registry.rb @@ -0,0 +1,13 @@ +class RemoveLastVerificationAtColumnsFromGeoProjectRegistry < ActiveRecord::Migration + DOWNTIME = false + + def up + remove_column :project_registry, :last_repository_verification_at + remove_column :project_registry, :last_wiki_verification_at + end + + def down + add_column :project_registry, :last_repository_verification_at, :datetime_with_timezone + add_column :project_registry, :last_wiki_verification_at, :datetime_with_timezone + end +end diff --git a/ee/db/geo/schema.rb b/ee/db/geo/schema.rb index c2e275e669c..7fd8dee2f4b 100644 --- a/ee/db/geo/schema.rb +++ b/ee/db/geo/schema.rb @@ -11,7 +11,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema.define(version: 20180201154345) do +ActiveRecord::Schema.define(version: 20180320013929) do # These are extensions that must be enabled in order to support this database enable_extension "plpgsql" @@ -53,21 +53,21 @@ ActiveRecord::Schema.define(version: 20180201154345) do t.string "last_repository_sync_failure" t.string "last_wiki_sync_failure" t.string "repository_verification_checksum" - t.datetime_with_timezone "last_repository_verification_at" - t.boolean "last_repository_verification_failed", default: false, null: false t.string "last_repository_verification_failure" t.string "wiki_verification_checksum" - t.datetime_with_timezone "last_wiki_verification_at" - t.boolean "last_wiki_verification_failed", default: false, null: false t.string "last_wiki_verification_failure" end add_index "project_registry", ["last_repository_successful_sync_at"], name: "index_project_registry_on_last_repository_successful_sync_at", using: :btree add_index "project_registry", ["last_repository_synced_at"], name: "index_project_registry_on_last_repository_synced_at", using: :btree + add_index "project_registry", ["project_id"], name: "idx_project_registry_on_repository_failure_partial", where: "(last_repository_verification_failure IS NOT NULL)", using: :btree + add_index "project_registry", ["project_id"], name: "idx_project_registry_on_wiki_failure_partial", where: "(last_wiki_verification_failure IS NOT NULL)", using: :btree add_index "project_registry", ["project_id"], name: "index_project_registry_on_project_id", unique: true, using: :btree add_index "project_registry", ["repository_retry_at"], name: "index_project_registry_on_repository_retry_at", using: :btree + add_index "project_registry", ["repository_verification_checksum"], name: "idx_project_registry_on_repository_checksum_partial", where: "(repository_verification_checksum IS NULL)", using: :btree add_index "project_registry", ["resync_repository"], name: "index_project_registry_on_resync_repository", using: :btree add_index "project_registry", ["resync_wiki"], name: "index_project_registry_on_resync_wiki", using: :btree add_index "project_registry", ["wiki_retry_at"], name: "index_project_registry_on_wiki_retry_at", using: :btree + add_index "project_registry", ["wiki_verification_checksum"], name: "idx_project_registry_on_wiki_checksum_partial", where: "(wiki_verification_checksum IS NULL)", using: :btree end diff --git a/ee/db/migrate/20180308234102_add_partial_index_to_project_repository_states_checksum_columns.rb b/ee/db/migrate/20180308234102_add_partial_index_to_project_repository_states_checksum_columns.rb new file mode 100644 index 00000000000..74ce0b5edae --- /dev/null +++ b/ee/db/migrate/20180308234102_add_partial_index_to_project_repository_states_checksum_columns.rb @@ -0,0 +1,25 @@ +class AddPartialIndexToProjectRepositoryStatesChecksumColumns < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + INDEX_NAME = 'idx_repository_states_on_checksums_partial' + + disable_ddl_transaction! + + def up + unless index_exists?(:project_repository_states, [:repository_verification_checksum, :wiki_verification_checksum], name: INDEX_NAME) + add_concurrent_index(:project_repository_states, + [:repository_verification_checksum, :wiki_verification_checksum], + name: INDEX_NAME, + length: Gitlab::Database.mysql? ? 20 : nil, + where: 'repository_verification_checksum IS NULL OR wiki_verification_checksum IS NULL' + ) + end + end + + def down + if index_exists?(:project_repository_states, [:repository_verification_checksum, :wiki_verification_checksum], name: INDEX_NAME) + remove_concurrent_index_by_name(:project_repository_states, INDEX_NAME) + end + end +end diff --git a/ee/db/migrate/20180314174825_add_partial_index_to_project_repository_states_verification_columns.rb b/ee/db/migrate/20180314174825_add_partial_index_to_project_repository_states_verification_columns.rb new file mode 100644 index 00000000000..357f2d27ef4 --- /dev/null +++ b/ee/db/migrate/20180314174825_add_partial_index_to_project_repository_states_verification_columns.rb @@ -0,0 +1,29 @@ +class AddPartialIndexToProjectRepositoryStatesVerificationColumns < ActiveRecord::Migration + include Gitlab::Database::MigrationHelpers + + DOWNTIME = false + REPOSITORY_INDEX_NAME = 'idx_repository_states_on_repository_failure_partial' + WIKI_INDEX_NAME = 'idx_repository_states_on_wiki_failure_partial' + + disable_ddl_transaction! + + def up + unless index_exists?(:project_repository_states, :last_repository_verification_failure, name: REPOSITORY_INDEX_NAME) + add_concurrent_index(:project_repository_states, :last_repository_verification_failure, name: REPOSITORY_INDEX_NAME, length: Gitlab::Database.mysql? ? 20 : nil, where: 'last_repository_verification_failure IS NOT NULL') + end + + unless index_exists?(:project_repository_states, :last_wiki_verification_failure, name: WIKI_INDEX_NAME) + add_concurrent_index(:project_repository_states, :last_wiki_verification_failure, name: WIKI_INDEX_NAME, length: Gitlab::Database.mysql? ? 20 : nil, where: 'last_wiki_verification_failure IS NOT NULL') + end + end + + def down + if index_exists?(:project_repository_states, :last_repository_verification_failure, name: REPOSITORY_INDEX_NAME) + remove_concurrent_index_by_name(:project_repository_states, REPOSITORY_INDEX_NAME) + end + + if index_exists?(:project_repository_states, :last_wiki_verification_failure, name: WIKI_INDEX_NAME) + remove_concurrent_index_by_name(:project_repository_states, WIKI_INDEX_NAME) + end + end +end diff --git a/ee/db/post_migrate/20180309215236_remove_last_verication_at_columns_from_project_repository_states.rb b/ee/db/post_migrate/20180309215236_remove_last_verication_at_columns_from_project_repository_states.rb new file mode 100644 index 00000000000..6da9cc82052 --- /dev/null +++ b/ee/db/post_migrate/20180309215236_remove_last_verication_at_columns_from_project_repository_states.rb @@ -0,0 +1,13 @@ +class RemoveLastVericationAtColumnsFromProjectRepositoryStates < ActiveRecord::Migration + DOWNTIME = false + + def up + remove_column :project_repository_states, :last_repository_verification_at + remove_column :project_repository_states, :last_wiki_verification_at + end + + def down + add_column :project_repository_states, :last_repository_verification_at, :datetime_with_timezone + add_column :project_repository_states, :last_wiki_verification_at, :datetime_with_timezone + end +end diff --git a/ee/db/post_migrate/20180314172513_remove_last_verication_failed_columns_from_project_repository_states.rb b/ee/db/post_migrate/20180314172513_remove_last_verication_failed_columns_from_project_repository_states.rb new file mode 100644 index 00000000000..ee36cc32785 --- /dev/null +++ b/ee/db/post_migrate/20180314172513_remove_last_verication_failed_columns_from_project_repository_states.rb @@ -0,0 +1,13 @@ +class RemoveLastVericationFailedColumnsFromProjectRepositoryStates < ActiveRecord::Migration + DOWNTIME = false + + def up + remove_column :project_repository_states, :last_repository_verification_failed + remove_column :project_repository_states, :last_wiki_verification_failed + end + + def down + add_column :project_repository_states, :last_repository_verification_failed, :boolean + add_column :project_repository_states, :last_wiki_verification_failed, :boolean + end +end diff --git a/ee/lib/gitlab/geo/database_tasks.rb b/ee/lib/gitlab/geo/database_tasks.rb index 75d28681755..00c57d77ba2 100644 --- a/ee/lib/gitlab/geo/database_tasks.rb +++ b/ee/lib/gitlab/geo/database_tasks.rb @@ -149,11 +149,25 @@ module Gitlab { database_config: YAML.load_file(GEO_DATABASE_CONFIG), db_dir: GEO_DB_DIR, - migrations_paths: [Rails.root.join(GEO_DB_DIR, 'migrate')], + migrations_paths: geo_migrations_paths, seed_loader: SeedLoader.new } end + def geo_migrations_paths + migrations_paths = [geo_migrate_path] + migrations_paths << geo_post_migration_path unless ENV['SKIP_POST_DEPLOYMENT_MIGRATIONS'] + migrations_paths + end + + def geo_migrate_path + Rails.root.join(GEO_DB_DIR, 'migrate') + end + + def geo_post_migration_path + Rails.root.join(GEO_DB_DIR, 'post_migrate') + end + def with_geo_db abort_if_no_geo_config! diff --git a/ee/lib/gitlab/geo/health_check.rb b/ee/lib/gitlab/geo/health_check.rb index 9a9be0af44f..f022807c322 100644 --- a/ee/lib/gitlab/geo/health_check.rb +++ b/ee/lib/gitlab/geo/health_check.rb @@ -35,6 +35,11 @@ module Gitlab @db_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'migrate') end + def self.db_post_migrate_path + # Lazy initialisation so Rails.root will be defined + @db_post_migrate_path ||= File.join(Rails.root, 'ee', 'db', 'geo', 'post_migrate') + end + def self.get_database_version if defined?(ActiveRecord) connection = ::Geo::BaseRegistry.connection @@ -51,7 +56,7 @@ module Gitlab def self.get_migration_version latest_migration = nil - Dir[File.join(self.db_migrate_path, "[0-9]*_*.rb")].each do |f| + Dir[File.join(self.db_migrate_path, "[0-9]*_*.rb"), File.join(self.db_post_migrate_path, "[0-9]*_*.rb")].each do |f| timestamp = f.scan(/0*([0-9]+)_[_.a-zA-Z0-9]*.rb/).first.first rescue -1 if latest_migration.nil? || timestamp.to_i > latest_migration.to_i diff --git a/ee/lib/gitlab/geo/log_cursor/daemon.rb b/ee/lib/gitlab/geo/log_cursor/daemon.rb index 6d55c4fb9a0..8ac8aebd404 100644 --- a/ee/lib/gitlab/geo/log_cursor/daemon.rb +++ b/ee/lib/gitlab/geo/log_cursor/daemon.rb @@ -103,7 +103,8 @@ module Gitlab end def handle_repository_updated_event(event, created_at) - registry = find_or_initialize_registry(event.project_id, "resync_#{event.source}" => true) + registry = find_or_initialize_registry(event.project_id, + "resync_#{event.source}" => true, "#{event.source}_verification_checksum" => nil) logger.event_info( created_at, diff --git a/ee/spec/factories/geo/project_registry.rb b/ee/spec/factories/geo/project_registry.rb index c6f399eb30a..07abf46dffc 100644 --- a/ee/spec/factories/geo/project_registry.rb +++ b/ee/spec/factories/geo/project_registry.rb @@ -74,29 +74,33 @@ FactoryBot.define do end trait :repository_verified do - repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee' - last_repository_verification_failed false - last_repository_verification_at { 5.days.ago } + repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee' + last_repository_verification_failure nil end - trait :wiki_verified do - wiki_verification_checksum 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef' - last_wiki_verification_failed false - last_wiki_verification_at { 5.days.ago } + trait :repository_verification_failed do + repository_verification_checksum nil + last_repository_verification_failure 'Repository checksum did not match' end - trait :repository_verification_failed do - repository_verification_checksum nil - last_repository_verification_at { 5.days.ago } - last_repository_verification_failed true - last_repository_verification_failure 'Repository checksum did not match' + trait :repository_verification_outdated do + repository_verification_checksum nil + last_repository_verification_failure nil + end + + trait :wiki_verified do + wiki_verification_checksum 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef' + last_wiki_verification_failure nil end trait :wiki_verification_failed do - wiki_verification_checksum nil - last_wiki_verification_at { 5.days.ago } - last_wiki_verification_failed true - last_wiki_verification_failure 'Wiki checksum did not match' + wiki_verification_checksum nil + last_wiki_verification_failure 'Wiki checksum did not match' + end + + trait :wiki_verification_outdated do + wiki_verification_checksum nil + last_wiki_verification_failure nil end end end diff --git a/ee/spec/factories/project_repository_states.rb b/ee/spec/factories/project_repository_states.rb index e24f34cfea1..9bd5c9d7ba3 100644 --- a/ee/spec/factories/project_repository_states.rb +++ b/ee/spec/factories/project_repository_states.rb @@ -2,28 +2,34 @@ FactoryBot.define do factory :repository_state, class: 'ProjectRepositoryState' do project + trait :repository_failed do + repository_verification_checksum nil + last_repository_verification_failure 'Could not calculate the checksum' + end + trait :repository_outdated do - repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee' - last_repository_verification_at { 5.days.ago } - last_repository_verification_failed false + repository_verification_checksum nil + last_repository_verification_failure false end trait :repository_verified do - repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee' - last_repository_verification_failed false - last_repository_verification_at { Time.now } + repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee' + last_repository_verification_failure false + end + + trait :wiki_failed do + wiki_verification_checksum nil + last_wiki_verification_failure 'Could not calculate the checksum' end trait :wiki_outdated do - repository_verification_checksum 'f079a831cab27bcda7d81cd9b48296d0c3dd92ee' - last_repository_verification_at { 5.days.ago } - last_repository_verification_failed false + wiki_verification_checksum nil + last_wiki_verification_failure nil end trait :wiki_verified do - wiki_verification_checksum 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef' - last_wiki_verification_failed false - last_wiki_verification_at { Time.now } + wiki_verification_checksum 'e079a831cab27bcda7d81cd9b48296d0c3dd92ef' + last_wiki_verification_failure nil end end end diff --git a/ee/spec/finders/geo/project_registry_finder_spec.rb b/ee/spec/finders/geo/project_registry_finder_spec.rb index 4bf4c4a809c..c6d32853b54 100644 --- a/ee/spec/finders/geo/project_registry_finder_spec.rb +++ b/ee/spec/finders/geo/project_registry_finder_spec.rb @@ -319,6 +319,93 @@ describe Geo::ProjectRegistryFinder, :geo do end end + shared_examples 'find outdated registries for repositories/wikis' do + it 'returns registries that verified on primary but not on secondary' do + project_verified = create(:repository_state, :repository_verified, :wiki_verified).project + repository_verified = create(:repository_state, :repository_verified).project + wiki_verified = create(:repository_state, :wiki_verified).project + + create(:geo_project_registry, :repository_verified, :wiki_verified, project: project_verified) + registry_repository_verified = create(:geo_project_registry, :repository_verified, project: repository_verified) + registry_wiki_verified = create(:geo_project_registry, :wiki_verified, project: wiki_verified) + + expect(subject.find_registries_to_verify(batch_size: 100)) + .to match_array([ + registry_repository_verified, + registry_wiki_verified + ]) + end + + it 'does not return registries were unverified/outdated on primary' do + project_unverified_primary = create(:project) + project_outdated_primary = create(:repository_state, :repository_outdated, :wiki_outdated).project + repository_outdated_primary = create(:repository_state, :repository_outdated, :wiki_verified).project + wiki_outdated_primary = create(:repository_state, :repository_verified, :wiki_outdated).project + + create(:geo_project_registry, project: project_unverified_primary) + create(:geo_project_registry, :repository_verification_outdated, :wiki_verification_outdated, project: project_outdated_primary) + create(:geo_project_registry, :repository_verified, :wiki_verified, project: repository_outdated_primary) + create(:geo_project_registry, :repository_verified, :wiki_verified, project: wiki_outdated_primary) + + expect(subject.find_registries_to_verify(batch_size: 100)).to be_empty + end + + it 'returns registries that were unverified/outdated on secondary' do + # Secondary unverified/outdated + project_unverified_secondary = create(:repository_state, :repository_verified, :wiki_verified).project + project_outdated_secondary = create(:repository_state, :repository_verified, :wiki_verified).project + repository_outdated_secondary = create(:repository_state, :repository_verified, :wiki_verified).project + wiki_outdated_secondary = create(:repository_state, :repository_verified, :wiki_verified).project + + registry_unverified_secondary = create(:geo_project_registry, project: project_unverified_secondary) + registry_outdated_secondary = create(:geo_project_registry, :repository_verification_outdated, :wiki_verification_outdated, project: project_outdated_secondary) + registry_repository_outdated_secondary = create(:geo_project_registry, :repository_verification_outdated, :wiki_verified, project: repository_outdated_secondary) + registry_wiki_outdated_secondary = create(:geo_project_registry, :repository_verified, :wiki_verification_outdated, project: wiki_outdated_secondary) + + expect(subject.find_registries_to_verify(batch_size: 100)) + .to match_array([ + registry_unverified_secondary, + registry_outdated_secondary, + registry_repository_outdated_secondary, + registry_wiki_outdated_secondary + ]) + end + + it 'does not return registries that both verification failed on primary' do + verification_failed_primary = create(:repository_state, :repository_failed, :wiki_failed).project + repository_failed_primary = create(:repository_state, :repository_failed, :wiki_verified).project + wiki_failed_primary = create(:repository_state, :repository_verified, :wiki_failed).project + + create(:geo_project_registry, project: verification_failed_primary) + registry_repository_failed_primary = create(:geo_project_registry, project: repository_failed_primary) + registry_wiki_failed_primary = create(:geo_project_registry, project: wiki_failed_primary) + + expect(subject.find_registries_to_verify(batch_size: 100)) + .to match_array([ + registry_repository_failed_primary, + registry_wiki_failed_primary + ]) + end + + it 'returns registries that verification failed on secondary' do + # Verification failed on secondary + verification_failed_secondary = create(:repository_state, :repository_verified, :wiki_verified).project + repository_failed_secondary = create(:repository_state, :repository_verified).project + wiki_failed_secondary = create(:repository_state, :wiki_verified).project + + registry_verification_failed_secondary = create(:geo_project_registry, :repository_verification_failed, :wiki_verification_failed, project: verification_failed_secondary) + registry_repository_failed_secondary = create(:geo_project_registry, :repository_verification_failed, project: repository_failed_secondary) + registry_wiki_failed_secondary = create(:geo_project_registry, :wiki_verification_failed, project: wiki_failed_secondary) + + expect(subject.find_registries_to_verify(batch_size: 100)) + .to match_array([ + registry_verification_failed_secondary, + registry_repository_failed_secondary, + registry_wiki_failed_secondary + ]) + end + end + # Disable transactions via :delete method because a foreign table # can't see changes inside a transaction of a different connection. context 'FDW', :delete do @@ -389,160 +476,9 @@ describe Geo::ProjectRegistryFinder, :geo do expect(projects.pluck(:id)).to match_array([project_repository_dirty.id, project_wiki_dirty.id]) end end - end - - shared_examples 'find registries for repositories/wikis' do |use_fdw| - before do - allow(Gitlab::Geo::Fdw).to receive(:enabled?).and_return(use_fdw) - end - - let(:verified_repository_state) { create(:repository_state, :repository_verified, :wiki_verified, last_repository_verification_at: 10.minutes.from_now) } - let(:verified_project) { create(:project, repository_state: verified_repository_state) } - let(:verified_registry) do - create(:geo_project_registry, - project: verified_project, - last_repository_verification_at: Time.now, - last_repository_successful_sync_at: 5.minutes.from_now) - end - - context 'with the primary verification failure' do - it 'finds when not failed' do - verified_repository_state.last_repository_verification_failed = false - verified_repository_state.last_wiki_verification_failed = false - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 1 - end - - it 'does not find when failed' do - verified_repository_state.last_repository_verification_failed = true - verified_repository_state.last_wiki_verification_failed = true - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 0 - end - - it 'finds when either repo/wiki fails' do - verified_repository_state.last_repository_verification_failed = true - verified_repository_state.last_wiki_verification_failed = false - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 1 - end - end - - context 'with the primary verification checksum' do - it 'finds with a checksum' do - verified_repository_state.repository_verification_checksum = 'my-checksum' - verified_repository_state.wiki_verification_checksum = 'my-checksum' - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 1 - end - - it 'does not find a checksum' do - verified_repository_state.repository_verification_checksum = nil - verified_repository_state.wiki_verification_checksum = nil - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 0 - end - - it 'finds when either repo/wiki has a checksum' do - verified_repository_state.repository_verification_checksum = 'my-checksum' - verified_repository_state.wiki_verification_checksum = nil - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 1 - end - end - - context 'with the primary repository verification date ' do - let(:verified_project) do - project = create(:project, repository_state: verified_repository_state) - project.update_attribute(:last_repository_updated_at, 30.minutes.ago) - project - end - let(:verified_registry) do - create(:geo_project_registry, - project: verified_project, - last_repository_verification_at: 25.minutes.ago, - last_wiki_verification_at: 25.minutes.ago, - last_repository_successful_sync_at: 5.minutes.from_now, - last_wiki_successful_sync_at: 5.minutes.from_now) - end - - it 'finds if primary verified after the primary repository was updated' do - verified_repository_state.last_repository_verification_at = 20.minutes.ago - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 1 - end - - it 'does not find if primary repository updated after primary verification' do - verified_repository_state.last_repository_verification_at = 35.minutes.ago - verified_repository_state.last_wiki_verification_at = 35.minutes.ago - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 0 - end - - it 'finds if primary wiki verified after the primary repository was updated' do - verified_repository_state.last_repository_verification_at = 35.minutes.ago - verified_repository_state.last_wiki_verification_at = 20.minutes.ago - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 1 - end - - it 'does not find if primary verification did not happen' do - verified_repository_state.last_repository_verification_at = nil - verified_repository_state.last_wiki_verification_at = nil - verified_registry - - expect(subject.find_registries_to_verify.count).to eq 0 - end - - it 'finds if primary was verified after the secondary was verified' do - verified_registry.update_attribute(:last_repository_verification_at, verified_repository_state.last_repository_verification_at - 5.minutes) - - expect(subject.find_registries_to_verify.count).to eq 1 - end - - it 'does not find if primary was verified before the secondary was verified' do - verified_registry.update_attributes( - last_repository_verification_at: verified_repository_state.last_repository_verification_at + 5.minutes, - last_wiki_verification_at: verified_repository_state.last_wiki_verification_at + 5.minutes - ) - - expect(subject.find_registries_to_verify.count).to eq 0 - end - end - - it 'returns repositories failed more than 24 hours ago' do - create(:geo_project_registry, - project: verified_project, - repository_verification_checksum: nil, - last_repository_verification_failed: true, - last_repository_verification_at: 2.days.ago) - - expect(subject.find_registries_to_verify.count).to eq 1 - end - - it 'does not return repositories failed less than 24 hours ago' do - create(:geo_project_registry, :repository_verification_failed, last_repository_verification_at: 5.hours.ago) - expect(subject.find_registries_to_verify.count).to eq 0 - end - end - - describe '#find_registries_to_verify', :delete do - context 'using FDW' do - include_examples 'find registries for repositories/wikis', true - end - - context 'using Legacy' do - include_examples 'find registries for repositories/wikis', false + describe '#find_registries_to_verify' do + include_examples 'find outdated registries for repositories/wikis' end end @@ -583,5 +519,9 @@ describe Geo::ProjectRegistryFinder, :geo do expect(projects.pluck(:id)).to match_array([project_repository_dirty.id, project_wiki_dirty.id]) end end + + describe '#find_registries_to_verify' do + include_examples 'find outdated registries for repositories/wikis' + end end end diff --git a/ee/spec/finders/geo/repository_verification_finder_spec.rb b/ee/spec/finders/geo/repository_verification_finder_spec.rb index 2d5366f9f9e..b43ff05ac2e 100644 --- a/ee/spec/finders/geo/repository_verification_finder_spec.rb +++ b/ee/spec/finders/geo/repository_verification_finder_spec.rb @@ -31,16 +31,6 @@ describe Geo::RepositoryVerificationFinder, :postgresql do expect(subject.find_outdated_projects(batch_size: 10)) .to match_array(project) end - - it 'returns verified projects that repositories have changed' do - repository_outdated = create(:repository_state, :repository_outdated).project - repository_outdated.update_column(:last_repository_updated_at, 6.hours.ago) - wiki_outdated = create(:repository_state, :wiki_outdated).project - wiki_outdated.update_column(:last_repository_updated_at, 48.hours.ago) - - expect(subject.find_outdated_projects(batch_size: 10)) - .to match_array([repository_outdated, wiki_outdated]) - end end describe '#find_unverified_projects' do diff --git a/ee/spec/lib/gitlab/geo/log_cursor/daemon_spec.rb b/ee/spec/lib/gitlab/geo/log_cursor/daemon_spec.rb index 533104fd066..c075573a97f 100644 --- a/ee/spec/lib/gitlab/geo/log_cursor/daemon_spec.rb +++ b/ee/spec/lib/gitlab/geo/log_cursor/daemon_spec.rb @@ -111,22 +111,44 @@ describe Gitlab::Geo::LogCursor::Daemon, :postgresql, :clean_gitlab_redis_shared expect { daemon.run_once! }.to change(Geo::ProjectRegistry, :count).by(1) end - it 'sets resync_repository to true if event source is repository' do - repository_updated_event.update!(source: Geo::RepositoryUpdatedEvent::REPOSITORY) - registry = create(:geo_project_registry, :synced, project: repository_updated_event.project) + context 'when event source is repository' do + let!(:registry) { create(:geo_project_registry, :synced, :repository_verified, project: repository_updated_event.project) } - daemon.run_once! + before do + repository_updated_event.update!(source: Geo::RepositoryUpdatedEvent::REPOSITORY) + end + + it 'sets resync_repository to true' do + daemon.run_once! - expect(registry.reload.resync_repository).to be true + expect(registry.reload.resync_repository).to be true + end + + it 'resets the repository verification checksum' do + daemon.run_once! + + expect(registry.reload).to have_attributes(resync_repository: true, repository_verification_checksum: nil) + end end - it 'sets resync_wiki to true if event source is wiki' do - repository_updated_event.update!(source: Geo::RepositoryUpdatedEvent::WIKI) - registry = create(:geo_project_registry, :synced, project: repository_updated_event.project) + context 'when event source is wiki' do + let!(:registry) { create(:geo_project_registry, :synced, :wiki_verified, project: repository_updated_event.project) } - daemon.run_once! + before do + repository_updated_event.update!(source: Geo::RepositoryUpdatedEvent::WIKI) + end + + it 'sets resync_wiki to true' do + daemon.run_once! - expect(registry.reload.resync_wiki).to be true + expect(registry.reload.resync_wiki).to be true + end + + it 'resets the wiki verification checksum' do + daemon.run_once! + + expect(registry.reload).to have_attributes(resync_wiki: true, wiki_verification_checksum: nil) + end end it 'performs Geo::ProjectSyncWorker' do diff --git a/ee/spec/models/project_repository_state_spec.rb b/ee/spec/models/project_repository_state_spec.rb index 916f680bd2f..bb49324afdb 100644 --- a/ee/spec/models/project_repository_state_spec.rb +++ b/ee/spec/models/project_repository_state_spec.rb @@ -18,59 +18,39 @@ describe ProjectRepositoryState do end describe '#repository_checksum_outdated?' do - where(:repository_verification_checksum, :last_repository_verification_at, :expected) do - now = Time.now - past = now - 1.year - future = now + 1.year + it 'returns true when repository_verification_checksum is nil' do + repository_state.repository_verification_checksum = nil - nil | nil | true - '123' | nil | true - '123' | past | true - '123' | now | true - '123' | future | false + expect(repository_state.repository_checksum_outdated?).to eq true end - with_them do - before do - repository_state.update!(repository_verification_checksum: repository_verification_checksum, last_repository_verification_at: last_repository_verification_at) - end - - subject { repository_state.repository_checksum_outdated?(Time.now) } + it 'returns false when repository_verification_checksum is not nil' do + repository_state.repository_verification_checksum = '123' - it { is_expected.to eq(expected) } + expect(repository_state.repository_checksum_outdated?).to eq false end end describe '#wiki_checksum_outdated?' do - where(:wiki_verification_checksum, :last_wiki_verification_at, :expected) do - now = Time.now - past = now - 1.year - future = now + 1.year + context 'wiki enabled' do + it 'returns true when wiki_verification_checksum is nil' do + repository_state.wiki_verification_checksum = nil - nil | nil | true - '123' | nil | true - '123' | past | true - '123' | now | true - '123' | future | false - end - - with_them do - before do - repository_state.update!(wiki_verification_checksum: wiki_verification_checksum, last_wiki_verification_at: last_wiki_verification_at) + expect(repository_state.wiki_checksum_outdated?).to eq true end - subject { repository_state.wiki_checksum_outdated?(Time.now) } + it 'returns false when wiki_verification_checksum is not nil' do + repository_state.wiki_verification_checksum = '123' - context 'wiki enabled' do - it { is_expected.to eq(expected) } + expect(repository_state.wiki_checksum_outdated?).to eq false end + end - context 'wiki disabled' do - before do - project.update!(wiki_enabled: false) - end + context 'wiki disabled' do + it 'returns false' do + project.update!(wiki_enabled: false) - it { is_expected.to be_falsy } + expect(repository_state.wiki_checksum_outdated?).to eq false end end end diff --git a/ee/spec/services/geo/repository_sync_service_spec.rb b/ee/spec/services/geo/repository_sync_service_spec.rb index d5893ea8018..002fb282f19 100644 --- a/ee/spec/services/geo/repository_sync_service_spec.rb +++ b/ee/spec/services/geo/repository_sync_service_spec.rb @@ -160,6 +160,18 @@ describe Geo::RepositorySyncService do expect(registry.last_repository_successful_sync_at).not_to be_nil end + it 'resets the repository_verification_checksum' do + subject.execute + + expect(registry.repository_verification_checksum).to be_nil + end + + it 'resets the last_repository_verification_failure' do + subject.execute + + expect(registry.last_repository_verification_failure).to be_nil + end + it 'logs success with timings' do allow(Gitlab::Geo::Logger).to receive(:info).and_call_original expect(Gitlab::Geo::Logger).to receive(:info).with(hash_including(:message, :update_delay_s, :download_time_s)).and_call_original diff --git a/ee/spec/services/geo/repository_updated_service_spec.rb b/ee/spec/services/geo/repository_updated_service_spec.rb new file mode 100644 index 00000000000..331ae5e2a6f --- /dev/null +++ b/ee/spec/services/geo/repository_updated_service_spec.rb @@ -0,0 +1,70 @@ +require 'spec_helper' + +describe Geo::RepositoryUpdatedService do + include ::EE::GeoHelpers + + set(:project) { create(:project) } + set(:primary) { create(:geo_node, :primary) } + set(:secondary) { create(:geo_node) } + set(:repository_state) { create(:repository_state, :repository_verified, :wiki_verified, project: project) } + + before do + stub_current_geo_node(primary) + end + + describe '#execute' do + subject { described_class.new(project, source: source) } + + shared_examples 'repository being updated' do + context 'when not running on a primary node' do + before do + allow(Gitlab::Geo).to receive(:primary?) { false } + end + + it 'does not create a repository updated event' do + expect { subject.execute }.not_to change(Geo::RepositoryUpdatedEvent, :count) + end + + it 'does not reset the repository verification checksum' do + expect { subject.execute }.not_to change(repository_state.reload, "#{method_prefix}_verification_checksum") + end + end + + context 'when running on a primary node' do + it 'creates a repository updated event' do + expect { subject.execute }.to change(Geo::RepositoryUpdatedEvent, :count).by(1) + end + + it 'resets the repository verification checksum' do + expect { subject.execute }.to change { repository_state.reload.public_send("#{method_prefix}_verification_checksum") }.to(nil) + end + + it 'does not raise an error when project have never been verified' do + expect { described_class.new(create(:project)) }.not_to raise_error + end + + it 'raises a Gitlab::Git::Checksum error when an error occurs' do + allow(subject.repository_state).to receive(:update!) + .with("#{method_prefix}_verification_checksum" => nil) + .and_raise(ActiveRecord::RecordInvalid.new(repository_state)) + + expect { subject.execute }.to raise_error Gitlab::Git::Checksum::Failure, /Cannot reset repository checksum/ + end + end + end + + context 'when repository is being updated' do + include_examples 'repository being updated' do + let(:source) { Geo::RepositoryUpdatedEvent::REPOSITORY } + let(:method_prefix) { 'repository' } + end + end + + context 'when wiki is being updated' do + include_examples 'repository being updated' do + let(:source) { Geo::RepositoryUpdatedEvent::WIKI } + let(:method_prefix) { 'wiki' } + end + end + end +end diff --git a/ee/spec/services/geo/repository_verify_secondary_service_spec.rb b/ee/spec/services/geo/repository_verify_secondary_service_spec.rb index 3923840c625..939ae2059a7 100644 --- a/ee/spec/services/geo/repository_verify_secondary_service_spec.rb +++ b/ee/spec/services/geo/repository_verify_secondary_service_spec.rb @@ -3,159 +3,80 @@ require 'spec_helper' describe Geo::RepositoryVerifySecondaryService, :geo do include ::EE::GeoHelpers - let(:primary) { create(:geo_node, :primary) } let(:secondary) { create(:geo_node) } before do stub_current_geo_node(secondary) end - describe '#execute' do - let(:repository_state) { create(:repository_state, project: create(:project, :repository))} - let(:registry) do - registry = create(:geo_project_registry, project: repository_state.project) - registry.project.last_repository_updated_at = 7.hours.ago - registry.project.repository_state.last_repository_verification_at = 5.hours.ago - registry.last_repository_successful_sync_at = 5.hours.ago - registry.project.repository_state.repository_verification_checksum = 'my_checksum' - - registry - end - let(:service) { described_class.new(registry, :repository) } - - it 'only works on the secondary' do - stub_current_geo_node(primary) + shared_examples 'verify checksums for repositories/wikis' do |type| + let(:checksum) { instance_double('Gitlab::Git::Checksum') } + let(:storage) { project.repository_storage } + let(:relative_path) { registry.repository_path(type) } - expect(service).not_to receive(:log_info) - - service.execute - end + subject(:service) { described_class.new(registry, type) } - it 'sets checksum when the checksum matches' do - allow(service).to receive(:calculate_checksum).and_return('my_checksum') + it 'does not calculate the checksum when not running on a secondary' do + allow(Gitlab::Geo).to receive(:secondary?) { false } - expect(service).to receive(:record_status).once.with(checksum: 'my_checksum') + expect(Gitlab::Git::Checksum).not_to receive(:new).with(storage, relative_path) service.execute end - it 'sets failure message when the checksum does not match' do - allow(service).to receive(:calculate_checksum).and_return('not_my_checksum') + it 'does not verify the checksum if resync is needed' do + registry.assign_attributes("resync_#{type}" => true) - expect(service).to receive(:record_status).once.with(error_msg: start_with('Repository checksum mismatch')) + expect(Gitlab::Git::Checksum).not_to receive(:new).with(storage, relative_path) service.execute end - end - - shared_examples 'should_verify_checksum? for repositories/wikis' do |type| - let(:repository_state) { create(:repository_state, project: create(:project, :repository))} - let(:registry) do - registry = create(:geo_project_registry, project: repository_state.project) - registry.project.last_repository_updated_at = 7.hours.ago - registry.project.repository_state.public_send("last_#{type}_verification_at=", 5.hours.ago) - registry.public_send("last_#{type}_successful_sync_at=", 5.hours.ago) - registry.project.repository_state.public_send("#{type}_verification_checksum=", 'my_checksum') - - registry - end - let(:service) { described_class.new(registry, type) } - - it 'verifies the repository' do - expect(service.should_verify_checksum?).to be_truthy - end - - it 'does not verify if primary was never verified' do - registry.project.repository_state.public_send("last_#{type}_verification_at=", nil) - - expect(service.should_verify_checksum?).to be_falsy - end - - it 'does not verify if the checksums already match' do - registry.project.repository_state.public_send("#{type}_verification_checksum=", 'my_checksum') - registry.public_send("#{type}_verification_checksum=", 'my_checksum') - expect(service.should_verify_checksum?).to be_falsy - end + it 'does not verify the checksum if primary was never verified' do + repository_state.assign_attributes("#{type}_verification_checksum" => nil) - it 'does not verify if the primary was verified before the secondary' do - registry.project.repository_state.public_send("last_#{type}_verification_at=", 50.minutes.ago) - registry.public_send("last_#{type}_verification_at=", 30.minutes.ago) + expect(Gitlab::Git::Checksum).not_to receive(:new).with(storage, relative_path) - expect(service.should_verify_checksum?).to be_falsy + service.execute end - it 'does verify if the secondary was never verified' do - registry.public_send("last_#{type}_verification_at=", nil) - - expect(service.should_verify_checksum?).to be_truthy - end + it 'does not verify the checksum if the checksums already match' do + repository_state.assign_attributes("#{type}_verification_checksum" => 'my_checksum') + registry.assign_attributes("#{type}_verification_checksum" => 'my_checksum') - it 'does not verify if never synced' do - registry.public_send("last_#{type}_successful_sync_at=", nil) + expect(Gitlab::Git::Checksum).not_to receive(:new).with(storage, relative_path) - expect(service.should_verify_checksum?).to be_falsy + service.execute end - it 'does not verify if the secondary synced before the last secondary verification' do - registry.public_send("last_#{type}_verification_at=", 50.minutes.ago) - registry.public_send("last_#{type}_successful_sync_at=", 30.minutes.ago) + it 'sets checksum when the checksum matches' do + expect(Gitlab::Git::Checksum).to receive(:new).with(storage, relative_path) { checksum } + expect(checksum).to receive(:calculate).and_return('my_checksum') - expect(service.should_verify_checksum?).to be_falsy + expect { service.execute }.to change(registry, "#{type}_verification_checksum") + .from(nil).to('my_checksum') end - it 'has been at least 6 hours since the primary repository was updated' do - registry.project.last_repository_updated_at = 7.hours.ago + it 'keeps track of failure when the checksum mismatch' do + expect(Gitlab::Git::Checksum).to receive(:new).with(storage, relative_path) { checksum } + expect(checksum).to receive(:calculate).and_return('other_checksum') - expect(service.should_verify_checksum?).to be_truthy + expect { service.execute }.to change(registry, "last_#{type}_verification_failure") + .from(nil).to(/#{Regexp.quote(type.to_s.capitalize)} checksum mismatch/) end end - describe '#should_verify_checksum?' do + describe '#execute' do + let(:project) { create(:project, :repository, :wiki_repo) } + let!(:repository_state) { create(:repository_state, project: project, repository_verification_checksum: 'my_checksum', wiki_verification_checksum: 'my_checksum') } + let(:registry) { create(:geo_project_registry, :synced, project: project) } + context 'repository' do - include_examples 'should_verify_checksum? for repositories/wikis', :repository + include_examples 'verify checksums for repositories/wikis', :repository end context 'wiki' do - include_examples 'should_verify_checksum? for repositories/wikis', :wiki - end - end - - shared_examples 'record_status for repositories/wikis' do |type| - it 'records a successful verification' do - service.send(:record_status, checksum: 'my_checksum') - registry.reload - - expect(registry.public_send("#{type}_verification_checksum")).to eq 'my_checksum' - expect(registry.public_send("last_#{type}_verification_at")).not_to be_nil - expect(registry.public_send("last_#{type}_verification_failure")).to be_nil - expect(registry.public_send("last_#{type}_verification_failed")).to be_falsey - end - - it 'records a failure' do - service.send(:record_status, error_msg: 'Repository checksum did not match') - registry.reload - - expect(registry.public_send("#{type}_verification_checksum")).to be_nil - expect(registry.public_send("last_#{type}_verification_at")).not_to be_nil - expect(registry.public_send("last_#{type}_verification_failure")).to eq 'Repository checksum did not match' - expect(registry.public_send("last_#{type}_verification_failed")).to be_truthy - end - end - - describe '#record_status' do - let(:registry) { create(:geo_project_registry) } - - context 'for a repository' do - let(:service) { described_class.new(registry, :repository) } - - include_examples 'record_status for repositories/wikis', :repository - end - - context 'for a wiki' do - let(:service) { described_class.new(registry, :wiki) } - - include_examples 'record_status for repositories/wikis', :wiki + include_examples 'verify checksums for repositories/wikis', :wiki end end end diff --git a/ee/spec/services/geo/wiki_sync_service_spec.rb b/ee/spec/services/geo/wiki_sync_service_spec.rb index cad05fea12d..07a157a14b0 100644 --- a/ee/spec/services/geo/wiki_sync_service_spec.rb +++ b/ee/spec/services/geo/wiki_sync_service_spec.rb @@ -139,6 +139,18 @@ RSpec.describe Geo::WikiSyncService do expect(registry.last_wiki_successful_sync_at).not_to be_nil end + it 'resets the wiki_verification_checksum' do + subject.execute + + expect(registry.wiki_verification_checksum).to be_nil + end + + it 'resets the last_wiki_verification_failure' do + subject.execute + + expect(registry.last_wiki_verification_failure).to be_nil + end + it 'logs success with timings' do allow(Gitlab::Geo::Logger).to receive(:info).and_call_original expect(Gitlab::Geo::Logger).to receive(:info).with(hash_including(:message, :update_delay_s, :download_time_s)).and_call_original diff --git a/ee/spec/services/wiki_pages/create_service_spec.rb b/ee/spec/services/wiki_pages/create_service_spec.rb index 2dff7483b64..d4aec02be83 100644 --- a/ee/spec/services/wiki_pages/create_service_spec.rb +++ b/ee/spec/services/wiki_pages/create_service_spec.rb @@ -19,17 +19,22 @@ describe WikiPages::CreateService do end describe '#execute' do - context 'when running on a Geo primary node' do - before do - allow(Gitlab::Geo).to receive(:primary?) { true } - end + it 'calls Geo::RepositoryUpdatedService when running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { true } - it 'triggers Geo::RepositoryUpdatedEventStore when Geo is enabled' do - expect(::Geo::RepositoryUpdatedEventStore).to receive(:new).with(instance_of(Project), source: Geo::RepositoryUpdatedEvent::WIKI).and_call_original - expect_any_instance_of(::Geo::RepositoryUpdatedEventStore).to receive(:create) + repository_updated_service = instance_double('::Geo::RepositoryUpdatedService') + expect(::Geo::RepositoryUpdatedService).to receive(:new).with(project, source: Geo::RepositoryUpdatedEvent::WIKI) { repository_updated_service } + expect(repository_updated_service).to receive(:execute) - service.execute - end + service.execute + end + + it 'does not call Geo::RepositoryUpdatedService when not running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { false } + + expect(::Geo::RepositoryUpdatedService).not_to receive(:new).with(project, source: Geo::RepositoryUpdatedEvent::WIKI) + + service.execute end end end diff --git a/ee/spec/services/wiki_pages/destroy_service_spec.rb b/ee/spec/services/wiki_pages/destroy_service_spec.rb index 7bf8c829fd9..ad0e5b8926a 100644 --- a/ee/spec/services/wiki_pages/destroy_service_spec.rb +++ b/ee/spec/services/wiki_pages/destroy_service_spec.rb @@ -12,17 +12,22 @@ describe WikiPages::DestroyService do end describe '#execute' do - context 'when running on a Geo primary node' do - before do - allow(Gitlab::Geo).to receive(:primary?) { true } - end + it 'calls Geo::RepositoryUpdatedService when running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { true } - it 'triggers Geo::RepositoryUpdatedEventStore when Geo is enabled' do - expect(::Geo::RepositoryUpdatedEventStore).to receive(:new).with(instance_of(Project), source: ::Geo::RepositoryUpdatedEvent::WIKI).and_call_original - expect_any_instance_of(::Geo::RepositoryUpdatedEventStore).to receive(:create) + repository_updated_service = instance_double('::Geo::RepositoryUpdatedService') + expect(::Geo::RepositoryUpdatedService).to receive(:new).with(project, source: Geo::RepositoryUpdatedEvent::WIKI) { repository_updated_service } + expect(repository_updated_service).to receive(:execute) - service.execute(page) - end + service.execute(page) + end + + it 'does not call Geo::RepositoryUpdatedService when not running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { false } + + expect(::Geo::RepositoryUpdatedService).not_to receive(:new).with(project, source: Geo::RepositoryUpdatedEvent::WIKI) + + service.execute(page) end end end diff --git a/ee/spec/services/wiki_pages/update_service_spec.rb b/ee/spec/services/wiki_pages/update_service_spec.rb index 0be7c90606f..6b87c484258 100644 --- a/ee/spec/services/wiki_pages/update_service_spec.rb +++ b/ee/spec/services/wiki_pages/update_service_spec.rb @@ -20,17 +20,22 @@ describe WikiPages::UpdateService do end describe '#execute' do - context 'when running on a Geo primary node' do - before do - allow(Gitlab::Geo).to receive(:primary?) { true } - end + it 'calls Geo::RepositoryUpdatedService when running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { true } - it 'triggers Geo::RepositoryUpdatedEventStore when Geo is enabled' do - expect(::Geo::RepositoryUpdatedEventStore).to receive(:new).with(instance_of(Project), source: Geo::RepositoryUpdatedEvent::WIKI).and_call_original - expect_any_instance_of(::Geo::RepositoryUpdatedEventStore).to receive(:create) + repository_updated_service = instance_double('::Geo::RepositoryUpdatedService') + expect(::Geo::RepositoryUpdatedService).to receive(:new).with(project, source: Geo::RepositoryUpdatedEvent::WIKI) { repository_updated_service } + expect(repository_updated_service).to receive(:execute) - service.execute(page) - end + service.execute(page) + end + + it 'does not call Geo::RepositoryUpdatedService when not running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { false } + + expect(::Geo::RepositoryUpdatedService).not_to receive(:new).with(project, source: Geo::RepositoryUpdatedEvent::WIKI) + + service.execute(page) end end end diff --git a/ee/spec/workers/geo/repository_verification/primary/shard_worker_spec.rb b/ee/spec/workers/geo/repository_verification/primary/shard_worker_spec.rb index 68d5596e267..712b9b2d595 100644 --- a/ee/spec/workers/geo/repository_verification/primary/shard_worker_spec.rb +++ b/ee/spec/workers/geo/repository_verification/primary/shard_worker_spec.rb @@ -37,11 +37,11 @@ describe Geo::RepositoryVerification::Primary::ShardWorker, :postgresql, :clean_ create(:repository_state, :wiki_outdated, project: wiki_outdated) expect(Geo::RepositoryVerification::Primary::SingleWorker) - .not_to receive(:perform_async).with(verified_project.id, instance_of(Time)) + .not_to receive(:perform_async).with(verified_project.id) expect(Geo::RepositoryVerification::Primary::SingleWorker) - .to receive(:perform_async).with(repository_outdated.id, instance_of(Time)) + .to receive(:perform_async).with(repository_outdated.id) expect(Geo::RepositoryVerification::Primary::SingleWorker) - .to receive(:perform_async).with(wiki_outdated.id, instance_of(Time)) + .to receive(:perform_async).with(wiki_outdated.id) subject.perform(shard_name) end @@ -52,7 +52,7 @@ describe Geo::RepositoryVerification::Primary::ShardWorker, :postgresql, :clean_ create(:repository_state, :wiki_verified, project: missing_repository_verification) expect(Geo::RepositoryVerification::Primary::SingleWorker) - .to receive(:perform_async).with(missing_repository_verification.id, instance_of(Time)) + .to receive(:perform_async).with(missing_repository_verification.id) subject.perform(shard_name) end @@ -63,7 +63,7 @@ describe Geo::RepositoryVerification::Primary::ShardWorker, :postgresql, :clean_ create(:repository_state, :repository_verified, project: missing_wiki_verification) expect(Geo::RepositoryVerification::Primary::SingleWorker) - .to receive(:perform_async).with(missing_wiki_verification.id, instance_of(Time)) + .to receive(:perform_async).with(missing_wiki_verification.id) subject.perform(shard_name) end @@ -107,11 +107,11 @@ describe Geo::RepositoryVerification::Primary::ShardWorker, :postgresql, :clean_ create(:repository_state, :repository_outdated, project: missing_outdated) expect(Geo::RepositoryVerification::Primary::SingleWorker) - .to receive(:perform_async).with(healthy_unverified.id, instance_of(Time)) + .to receive(:perform_async).with(healthy_unverified.id) expect(Geo::RepositoryVerification::Primary::SingleWorker) - .not_to receive(:perform_async).with(missing_not_verified.id, instance_of(Time)) + .not_to receive(:perform_async).with(missing_not_verified.id) expect(Geo::RepositoryVerification::Primary::SingleWorker) - .not_to receive(:perform_async).with(missing_outdated.id, instance_of(Time)) + .not_to receive(:perform_async).with(missing_outdated.id) Sidekiq::Testing.inline! { subject.perform(shard_name) } end diff --git a/ee/spec/workers/geo/repository_verification/primary/single_worker_spec.rb b/ee/spec/workers/geo/repository_verification/primary/single_worker_spec.rb index 7591769b048..3aa91891d9a 100644 --- a/ee/spec/workers/geo/repository_verification/primary/single_worker_spec.rb +++ b/ee/spec/workers/geo/repository_verification/primary/single_worker_spec.rb @@ -22,7 +22,7 @@ describe Geo::RepositoryVerification::Primary::SingleWorker, :postgresql, :clean expect_any_instance_of(Gitlab::Git::Checksum).not_to receive(:calculate) - subject.perform(project_without_repositories.id, Time.now) + subject.perform(project_without_repositories.id) end it 'does not calculate the checksum when project is pending deletion' do @@ -30,97 +30,105 @@ describe Geo::RepositoryVerification::Primary::SingleWorker, :postgresql, :clean expect_any_instance_of(Gitlab::Git::Checksum).not_to receive(:calculate) - subject.perform(project_with_repositories.id, Time.now) + subject.perform(project_with_repositories.id) end it 'does not raise an error when project could not be found' do - expect { subject.perform(-1, Time.now) }.not_to raise_error + expect { subject.perform(-1) }.not_to raise_error end it 'calculates the checksum for unverified projects' do - subject.perform(project_with_repositories.id, Time.now) + subject.perform(project_with_repositories.id) expect(project_with_repositories.repository_state).to have_attributes( repository_verification_checksum: instance_of(String), last_repository_verification_failure: nil, - last_repository_verification_at: instance_of(Time), - last_repository_verification_failed: false, wiki_verification_checksum: instance_of(String), - last_wiki_verification_failure: nil, - last_wiki_verification_at: instance_of(Time), - last_wiki_verification_failed: false + last_wiki_verification_failure: nil ) end it 'calculates the checksum for outdated projects' do repository_state = - create(:repository_state, :repository_verified, :wiki_verified, + create(:repository_state, + project: project_with_repositories, + repository_verification_checksum: nil, + wiki_verification_checksum: nil) + + subject.perform(project_with_repositories.id) + + repository_state.reload + + expect(repository_state.repository_verification_checksum).not_to be_nil + expect(repository_state.wiki_verification_checksum).not_to be_nil + end + + it 'calculates the checksum for outdated repositories' do + repository_state = + create(:repository_state, + project: project_with_repositories, + repository_verification_checksum: nil, + wiki_verification_checksum: 'e123') + + subject.perform(project_with_repositories.id) + + repository_state.reload + + expect(repository_state.repository_verification_checksum).not_to be_nil + expect(repository_state.wiki_verification_checksum).to eq 'e123' + end + + it 'calculates the checksum for outdated wikis' do + repository_state = + create(:repository_state, project: project_with_repositories, repository_verification_checksum: 'f123', - last_repository_verification_at: Time.now - 1.hour, - wiki_verification_checksum: 'e123', - last_wiki_verification_at: Time.now - 1.hour) + wiki_verification_checksum: nil) - subject.perform(project_with_repositories.id, Time.now) + subject.perform(project_with_repositories.id) repository_state.reload - expect(repository_state.repository_verification_checksum).not_to eq 'f123' - expect(repository_state.last_repository_verification_at).to be_within(10.seconds).of(Time.now) - expect(repository_state.wiki_verification_checksum).not_to eq 'e123' - expect(repository_state.last_wiki_verification_at).to be_within(10.seconds).of(Time.now) + expect(repository_state.repository_verification_checksum).to eq 'f123' + expect(repository_state.wiki_verification_checksum).not_to be_nil end it 'does not recalculate the checksum for projects up to date' do - last_verification_at = Time.now - repository_state = - create(:repository_state, :repository_verified, :wiki_verified, + create(:repository_state, project: project_with_repositories, repository_verification_checksum: 'f123', - last_repository_verification_at: last_verification_at, - wiki_verification_checksum: 'e123', - last_wiki_verification_at: last_verification_at) + wiki_verification_checksum: 'e123') - subject.perform(project_with_repositories.id, Time.now - 1.hour) + subject.perform(project_with_repositories.id - 1.hour) expect(repository_state.reload).to have_attributes( repository_verification_checksum: 'f123', - last_repository_verification_at: be_within(1.second).of(last_verification_at), - wiki_verification_checksum: 'e123', - last_wiki_verification_at: be_within(1.second).of(last_verification_at) + wiki_verification_checksum: 'e123' ) end it 'does not calculate the wiki checksum when wiki is not enabled for project' do project_with_repositories.update!(wiki_enabled: false) - subject.perform(project_with_repositories.id, Time.now) + subject.perform(project_with_repositories.id) expect(project_with_repositories.repository_state).to have_attributes( repository_verification_checksum: instance_of(String), last_repository_verification_failure: nil, - last_repository_verification_at: instance_of(Time), - last_repository_verification_failed: false, wiki_verification_checksum: nil, - last_wiki_verification_failure: nil, - last_wiki_verification_at: nil, - last_wiki_verification_failed: false + last_wiki_verification_failure: nil ) end it 'keeps track of failures when calculating the repository checksum' do - subject.perform(project_without_repositories.id, Time.now) + subject.perform(project_without_repositories.id) expect(project_without_repositories.repository_state).to have_attributes( repository_verification_checksum: nil, last_repository_verification_failure: /No repository for such path/, - last_repository_verification_at: instance_of(Time), - last_repository_verification_failed: true, wiki_verification_checksum: nil, - last_wiki_verification_failure: /No repository for such path/, - last_wiki_verification_at: instance_of(Time), - last_wiki_verification_failed: true + last_wiki_verification_failure: /No repository for such path/ ) end end diff --git a/ee/spec/workers/post_receive_spec.rb b/ee/spec/workers/post_receive_spec.rb index 3f8730f6aff..441e98136e6 100644 --- a/ee/spec/workers/post_receive_spec.rb +++ b/ee/spec/workers/post_receive_spec.rb @@ -26,8 +26,18 @@ describe PostReceive do allow_any_instance_of(GitPushService).to receive(:execute).and_return(true) end - it 'calls Geo::RepositoryUpdatedEventStore' do - expect_any_instance_of(::Geo::RepositoryUpdatedEventStore).to receive(:create) + it 'calls Geo::RepositoryUpdatedService when running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { true } + + expect_any_instance_of(::Geo::RepositoryUpdatedService).to receive(:execute) + + described_class.new.perform(gl_repository, key_id, base64_changes) + end + + it 'does not call Geo::RepositoryUpdatedEventStore when not running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { false } + + expect_any_instance_of(::Geo::RepositoryUpdatedService).not_to receive(:execute) described_class.new.perform(gl_repository, key_id, base64_changes) end @@ -37,11 +47,18 @@ describe PostReceive do describe '#process_wiki_changes' do let(:gl_repository) { "wiki-#{project.id}" } - it 'triggers Geo::RepositoryUpdatedEventStore when Geo is enabled' do - allow(Gitlab::Geo).to receive(:enabled?) { true } + it 'calls Geo::RepositoryUpdatedEventStore when running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { true } + + expect_any_instance_of(::Geo::RepositoryUpdatedService).to receive(:execute) + + described_class.new.perform(gl_repository, key_id, base64_changes) + end + + it 'does not call Geo::RepositoryUpdatedEventStore when not running on a Geo primary node' do + allow(Gitlab::Geo).to receive(:primary?) { false } - expect(::Geo::RepositoryUpdatedEventStore).to receive(:new).with(instance_of(Project), source: ::Geo::RepositoryUpdatedEvent::WIKI).and_call_original - expect_any_instance_of(::Geo::RepositoryUpdatedEventStore).to receive(:create) + expect_any_instance_of(::Geo::RepositoryUpdatedService).not_to receive(:execute) described_class.new.perform(gl_repository, key_id, base64_changes) end -- 2.30.9