Commit 957212b5 authored by Michael Kozono's avatar Michael Kozono

Merge branch '212720-geo-make-registry-tables-as-ssot-for-designs' into 'master'

Geo - Make Registry tables as SSOT for Designs

Closes #212720

See merge request gitlab-org/gitlab!36587
parents 97f71074 757137a3
...@@ -3,30 +3,77 @@ ...@@ -3,30 +3,77 @@
module Geo module Geo
class DesignRegistryFinder < RegistryFinder class DesignRegistryFinder < RegistryFinder
def count_syncable def count_syncable
GeoNode.find(current_node_id).projects.with_designs.count current_node(fdw: false).designs.count
end end
def count_synced def count_synced
registries registries.merge(Geo::DesignRegistry.synced).count
.merge(Geo::DesignRegistry.synced).count
end end
def count_failed def count_failed
registries registries.merge(Geo::DesignRegistry.failed).count
.merge(Geo::DesignRegistry.failed).count
end end
def count_registry def count_registry
registries.count registries.count
end end
def find_registry_differences(range)
source_ids = Gitlab::Geo.current_node.designs.id_in(range).pluck_primary_key
tracked_ids = Geo::DesignRegistry.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
# Returns Geo::DesignRegistry records that have never been synced.
#
# Does not care about selective sync, because it considers the Registry
# table to be the single source of truth. The contract is that other
# processes need to ensure that the table only contains records that should
# be synced.
#
# Any registries that have ever been synced that currently need to be
# resynced will be handled by other find methods (like
# #find_retryable_dirty_registries)
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: [])
Geo::DesignRegistry
.never_synced
.model_id_not_in(except_ids)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
# rubocop:disable CodeReuse/ActiveRecord
def find_retryable_dirty_registries(batch_size:, except_ids: [])
Geo::DesignRegistry
.updated_recently
.model_id_not_in(except_ids)
.order(Gitlab::Database.nulls_first_order(:last_synced_at))
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private private
def registries def registries
current_node if Geo::DesignRegistry.registry_consistency_worker_enabled?
Geo::DesignRegistry.all
else
current_node(fdw: true)
.projects .projects
.with_designs .with_designs
.inner_join_design_registry .inner_join_design_registry
end end
end end
end
end end
...@@ -140,7 +140,7 @@ module EE ...@@ -140,7 +140,7 @@ module EE
scope :aimed_for_deletion, -> (date) { where('marked_for_deletion_at <= ?', date).without_deleted } scope :aimed_for_deletion, -> (date) { where('marked_for_deletion_at <= ?', date).without_deleted }
scope :with_repos_templates, -> { where(namespace_id: ::Gitlab::CurrentSettings.current_application_settings.custom_project_templates_group_id) } scope :with_repos_templates, -> { where(namespace_id: ::Gitlab::CurrentSettings.current_application_settings.custom_project_templates_group_id) }
scope :with_groups_level_repos_templates, -> { joins("INNER JOIN namespaces ON projects.namespace_id = namespaces.custom_project_templates_group_id") } scope :with_groups_level_repos_templates, -> { joins("INNER JOIN namespaces ON projects.namespace_id = namespaces.custom_project_templates_group_id") }
scope :with_designs, -> { where(id: ::DesignManagement::Design.select(:project_id)) } scope :with_designs, -> { where(id: ::DesignManagement::Design.select(:project_id).distinct) }
scope :with_deleting_user, -> { includes(:deleting_user) } scope :with_deleting_user, -> { includes(:deleting_user) }
scope :with_compliance_framework_settings, -> { preload(:compliance_framework_setting) } scope :with_compliance_framework_settings, -> { preload(:compliance_framework_setting) }
scope :has_vulnerabilities, -> { joins(:vulnerabilities).group(:id) } scope :has_vulnerabilities, -> { joins(:vulnerabilities).group(:id) }
......
...@@ -10,6 +10,7 @@ class Geo::DesignRegistry < Geo::BaseRegistry ...@@ -10,6 +10,7 @@ class Geo::DesignRegistry < Geo::BaseRegistry
belongs_to :project belongs_to :project
scope :never_synced, -> { with_state(:pending).where(last_synced_at: nil) }
scope :pending, -> { with_state(:pending) } scope :pending, -> { with_state(:pending) }
scope :failed, -> { with_state(:failed) } scope :failed, -> { with_state(:failed) }
scope :synced, -> { with_state(:synced) } scope :synced, -> { with_state(:synced) }
...@@ -39,6 +40,28 @@ class Geo::DesignRegistry < Geo::BaseRegistry ...@@ -39,6 +40,28 @@ class Geo::DesignRegistry < Geo::BaseRegistry
end end
end end
def self.registry_consistency_worker_enabled?
Feature.enabled?(:geo_design_registry_ssot_sync)
end
def self.delete_for_model_ids(project_ids)
# We only need to delete the registry entries here. The design
# repository deletion should happen when a project is destroyed.
#
# See: https://gitlab.com/gitlab-org/gitlab/-/issues/13429
where(project_id: project_ids).delete_all
project_ids
end
def self.finder_class
::Geo::DesignRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
# Search for a list of projects associated with registries, # Search for a list of projects associated with registries,
# based on the query given in `query`. # based on the query given in `query`.
# #
......
...@@ -262,6 +262,10 @@ class GeoNode < ApplicationRecord ...@@ -262,6 +262,10 @@ class GeoNode < ApplicationRecord
ContainerRepository.project_id_in(projects) ContainerRepository.project_id_in(projects)
end end
def designs
projects.with_designs
end
def lfs_objects def lfs_objects
return LfsObject.all unless selective_sync? return LfsObject.all unless selective_sync?
......
...@@ -10,16 +10,50 @@ module Geo ...@@ -10,16 +10,50 @@ module Geo
{ project_id: project_id, job_id: job_id } if job_id { project_id: project_id, job_id: job_id } if job_id
end end
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_not_synced(except_ids:, batch_size:) def find_project_ids_not_synced(except_ids:, batch_size:)
if Geo::DesignRegistry.registry_consistency_worker_enabled?
project_ids =
find_never_synced_project_ids(batch_size: batch_size, except_ids: except_ids)
find_project_ids_within_shard(project_ids, direction: :desc)
else
Geo::DesignUnsyncedFinder Geo::DesignUnsyncedFinder
.new(scheduled_project_ids: except_ids, shard_name: shard_name, batch_size: batch_size) .new(scheduled_project_ids: except_ids, shard_name: shard_name, batch_size: batch_size)
.execute .execute
end end
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_updated_recently(except_ids:, batch_size:) def find_project_ids_updated_recently(except_ids:, batch_size:)
if Geo::DesignRegistry.registry_consistency_worker_enabled?
project_ids =
find_retryable_dirty_project_ids(batch_size: batch_size, except_ids: except_ids)
find_project_ids_within_shard(project_ids, direction: :asc)
else
Geo::DesignUpdatedRecentlyFinder Geo::DesignUpdatedRecentlyFinder
.new(scheduled_project_ids: except_ids, shard_name: shard_name, batch_size: batch_size) .new(scheduled_project_ids: except_ids, shard_name: shard_name, batch_size: batch_size)
.execute .execute
end end
end end
# rubocop: enable CodeReuse/ActiveRecord
def find_never_synced_project_ids(batch_size:, except_ids:)
registry_finder
.find_never_synced_registries(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
end
def find_retryable_dirty_project_ids(batch_size:, except_ids:)
registry_finder
.find_retryable_dirty_registries(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
end
def registry_finder
@registry_finder ||= Geo::DesignRegistryFinder.new
end
end
end end
...@@ -16,6 +16,7 @@ module Geo ...@@ -16,6 +16,7 @@ module Geo
feature_category :geo_replication feature_category :geo_replication
REGISTRY_CLASSES = [ REGISTRY_CLASSES = [
Geo::DesignRegistry,
Geo::JobArtifactRegistry, Geo::JobArtifactRegistry,
Geo::LfsObjectRegistry, Geo::LfsObjectRegistry,
Geo::PackageFileRegistry, Geo::PackageFileRegistry,
......
...@@ -7,8 +7,9 @@ FactoryBot.define do ...@@ -7,8 +7,9 @@ FactoryBot.define do
last_synced_at { nil } last_synced_at { nil }
state { :pending } state { :pending }
after(:create) do |registry, evaluator| after(:create) do |registry|
create(:design, project: registry.project) issue = create(:issue, project: registry.project)
create(:design, project: registry.project, issue: issue)
end end
trait :synced do trait :synced do
......
# frozen_string_literal: true # frozen_string_literal: true
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::DesignRegistryFinder, :geo, :geo_fdw do RSpec.describe Geo::DesignRegistryFinder, :geo do
include ::EE::GeoHelpers include ::EE::GeoHelpers
let!(:secondary) { create(:geo_node) } let(:secondary) { create(:geo_node) }
let!(:failed_registry) { create(:geo_design_registry, :sync_failed) } let(:project_1) { create(:project) }
let!(:synced_registry) { create(:geo_design_registry, :synced) } let(:project_2) { create(:project) }
let(:project_3) { create(:project) }
let(:synced_group) { create(:group) } let(:project_4) { create(:project) }
let(:unsynced_group) { create(:group) } let(:project_5) { create(:project) }
let(:synced_project) { create(:project, group: synced_group) } let(:project_6) { create(:project) }
let(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
subject { described_class.new(current_node_id: secondary.id) } subject { described_class.new(current_node_id: secondary.id) }
context 'when geo_design_registry_ssot_sync is disabled', :geo_fdw do
let!(:failed_registry) { create(:geo_design_registry, :sync_failed) }
let!(:synced_registry) { create(:geo_design_registry, :synced) }
before do before do
stub_current_geo_node(secondary) stub_feature_flags(geo_design_registry_ssot_sync: false)
end end
context 'count all the things' do
describe '#count_syncable' do describe '#count_syncable' do
it 'returns number of design repositories' do it 'returns number of design repositories' do
# One more design for the same project to assert absence of duplicates # One more design for the same project to assert absence of duplicates
create(:design, project: synced_registry.project) create(:design, project: synced_registry.project)
result = subject.count_syncable expect(subject.count_syncable).to eq(2)
expect(result).to eq(2)
end end
end end
describe '#count_synced' do describe '#count_synced' do
it 'returns only synced registry' do it 'returns only synced registry' do
result = subject.count_synced expect(subject.count_synced).to eq(1)
expect(result).to eq(1)
end end
end end
describe '#count_failed' do describe '#count_failed' do
it 'returns only failed registry' do it 'returns only failed registry' do
result = subject.count_failed expect(subject.count_failed).to eq(1)
expect(result).to eq(1)
end end
end end
describe '#count_registry' do describe '#count_registry' do
it 'returns number of all registries' do it 'returns number of all registries' do
result = subject.count_registry expect(subject.count_registry).to eq(2)
expect(result).to eq(2)
end end
end end
context 'selective sync' do context 'selective sync' do
let(:synced_group) { create(:group) }
let(:unsynced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
let(:unsynced_project2) { create(:project, group: unsynced_group) } let(:unsynced_project2) { create(:project, group: unsynced_group) }
let(:synced_project2) { create(:project, group: synced_group) } let(:synced_project2) { create(:project, group: synced_group) }
...@@ -103,4 +101,254 @@ RSpec.describe Geo::DesignRegistryFinder, :geo, :geo_fdw do ...@@ -103,4 +101,254 @@ RSpec.describe Geo::DesignRegistryFinder, :geo, :geo_fdw do
end end
end end
end end
context 'when geo_design_registry_ssot_sync is enabled' do
let!(:registry_project_1) { create(:geo_design_registry, :synced, project_id: project_1.id) }
let!(:registry_project_2) { create(:geo_design_registry, :sync_failed, project_id: project_2.id) }
let!(:registry_project_3) { create(:geo_design_registry, project_id: project_3.id, last_synced_at: nil) }
let!(:registry_project_4) { create(:geo_design_registry, project_id: project_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let!(:registry_project_5) { create(:geo_design_registry, project_id: project_5.id, last_synced_at: 6.days.ago) }
let!(:registry_project_6) { create(:geo_design_registry, project_id: project_6.id, last_synced_at: nil) }
before do
stub_feature_flags(geo_design_registry_ssot_sync: true)
end
describe '#count_syncable' do
it 'returns number of design repositories' do
# One more design for the same project to assert absence of duplicates
create(:design, project: project_1)
result = subject.count_syncable
expect(result).to eq(6)
end
end
describe '#count_synced' do
it 'returns only synced registry' do
expect(subject.count_synced).to eq(1)
end
end
describe '#count_failed' do
it 'returns only failed registry' do
expect(subject.count_failed).to eq(1)
end
end
describe '#count_registry' do
it 'returns number of all registries' do
expect(subject.count_registry).to eq(6)
end
end
end
describe '#find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_1) { create(:project, group: synced_group) }
let_it_be(:project_2) { create(:project, group: nested_group) }
let_it_be(:project_3) { create(:project) }
let_it_be(:project_4) { create(:project) }
let_it_be(:project_5) { create(:project, :broken_storage) }
let_it_be(:project_6) { create(:project, :broken_storage) }
let_it_be(:project_7) { create(:project) }
before_all do
create(:design, project: project_1)
create(:design, project: project_2)
create(:design, project: project_3)
create(:design, project: project_4)
create(:design, project: project_5)
create(:design, project: project_6)
end
before do
stub_current_geo_node(secondary)
end
context 'untracked IDs' do
before do
create(:geo_design_registry, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_3.id)
create(:geo_design_registry, project_id: project_5.id)
end
it 'includes project IDs without an entry on the tracking database' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id, project_4.id, project_6.id])
end
it 'excludes projects outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(project_4.id..project_6.id)
expect(untracked_ids).to match_array([project_4.id, project_6.id])
end
it 'excludes projects without designs' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).not_to include([project_7])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_design_registry, project_id: project_1.id) }
before do
project_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
it 'excludes IDs outside the ID range' do
range = (project_1.id + 1)..Project.maximum(:id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked project' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_3.id) }
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_3.id..project_3.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked project' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
context 'excluded from selective sync' do
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_5.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_5.id..project_5.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#find_never_synced_registries' do
let!(:registry_project_1) { create(:geo_design_registry, :synced, project_id: project_1.id) }
let!(:registry_project_2) { create(:geo_design_registry, :sync_failed, project_id: project_2.id) }
let!(:registry_project_3) { create(:geo_design_registry, project_id: project_3.id, last_synced_at: nil) }
let!(:registry_project_4) { create(:geo_design_registry, project_id: project_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let!(:registry_project_5) { create(:geo_design_registry, project_id: project_5.id, last_synced_at: 6.days.ago) }
let!(:registry_project_6) { create(:geo_design_registry, project_id: project_6.id, last_synced_at: nil) }
it 'returns registries for projects that have never been synced' do
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_project_3, registry_project_6)
end
it 'excludes except_ids' do
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [project_3.id])
expect(registries).to match_ids(registry_project_6)
end
end
describe '#find_retryable_dirty_registries' do
let!(:registry_project_1) { create(:geo_design_registry, :synced, project_id: project_1.id) }
let!(:registry_project_2) { create(:geo_design_registry, :sync_failed, project_id: project_2.id) }
let!(:registry_project_3) { create(:geo_design_registry, project_id: project_3.id, last_synced_at: nil) }
let!(:registry_project_4) { create(:geo_design_registry, project_id: project_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let!(:registry_project_5) { create(:geo_design_registry, project_id: project_5.id, last_synced_at: 6.days.ago) }
let!(:registry_project_6) { create(:geo_design_registry, project_id: project_6.id, last_synced_at: nil) }
it 'returns registries for projects that have been recently updated' do
registries = subject.find_retryable_dirty_registries(batch_size: 10)
expect(registries).to match_ids(registry_project_2, registry_project_3, registry_project_4, registry_project_5, registry_project_6)
end
it 'excludes except_ids' do
registries = subject.find_retryable_dirty_registries(batch_size: 10, except_ids: [project_4.id, project_5.id, project_6.id])
expect(registries).to match_ids(registry_project_2, registry_project_3)
end
end
end end
...@@ -8,8 +8,6 @@ RSpec.describe Geo::DesignRegistry, :geo do ...@@ -8,8 +8,6 @@ RSpec.describe Geo::DesignRegistry, :geo do
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end end
let!(:design_registry) { create(:geo_design_registry) }
describe 'relationships' do describe 'relationships' do
it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:project) }
end end
...@@ -19,6 +17,7 @@ RSpec.describe Geo::DesignRegistry, :geo do ...@@ -19,6 +17,7 @@ RSpec.describe Geo::DesignRegistry, :geo do
end end
describe '#search', :geo_fdw do describe '#search', :geo_fdw do
let!(:design_registry) { create(:geo_design_registry) }
let!(:failed_registry) { create(:geo_design_registry, :sync_failed) } let!(:failed_registry) { create(:geo_design_registry, :sync_failed) }
let!(:synced_registry) { create(:geo_design_registry, :synced) } let!(:synced_registry) { create(:geo_design_registry, :synced) }
...@@ -94,6 +93,8 @@ RSpec.describe Geo::DesignRegistry, :geo do ...@@ -94,6 +93,8 @@ RSpec.describe Geo::DesignRegistry, :geo do
end end
describe '#should_be_redownloaded?' do describe '#should_be_redownloaded?' do
let_it_be(:design_registry) { create(:geo_design_registry) }
context 'when force_to_redownload is false' do context 'when force_to_redownload is false' do
it 'returns false' do it 'returns false' do
expect(design_registry.should_be_redownloaded?).to be false expect(design_registry.should_be_redownloaded?).to be false
......
...@@ -11,19 +11,18 @@ RSpec.describe Geo::RegistryConsistencyService, :geo, :use_clean_rails_memory_st ...@@ -11,19 +11,18 @@ RSpec.describe Geo::RegistryConsistencyService, :geo, :use_clean_rails_memory_st
stub_current_geo_node(secondary) stub_current_geo_node(secondary)
end end
def model_class_factory_name(model_class) def model_class_factory_name(registry_class)
if model_class == ::Packages::PackageFile return :project_with_design if registry_class == ::Geo::DesignRegistry
:package_file_with_file return :package_file_with_file if registry_class == ::Geo::PackageFileRegistry
else
model_class.underscore.tr('/', '_').to_sym registry_class::MODEL_CLASS.underscore.tr('/', '_').to_sym
end
end end
shared_examples 'registry consistency service' do |klass| shared_examples 'registry consistency service' do |klass|
let(:registry_class) { klass } let(:registry_class) { klass }
let(:registry_class_factory) { registry_factory_name(registry_class) } let(:registry_class_factory) { registry_factory_name(registry_class) }
let(:model_class) { registry_class::MODEL_CLASS } let(:model_class) { registry_class::MODEL_CLASS }
let(:model_class_factory) { model_class_factory_name(model_class) } let(:model_class_factory) { model_class_factory_name(registry_class) }
let(:model_foreign_key) { registry_class::MODEL_FOREIGN_KEY } let(:model_foreign_key) { registry_class::MODEL_FOREIGN_KEY }
let(:batch_size) { 2 } let(:batch_size) { 2 }
......
...@@ -17,7 +17,6 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :geo_fdw, :clean_gitl ...@@ -17,7 +17,6 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :geo_fdw, :clean_gitl
describe '#perform' do describe '#perform' do
let(:restricted_group) { create(:group) } let(:restricted_group) { create(:group) }
let(:unsynced_project_in_restricted_group) { create(:project, group: restricted_group) } let(:unsynced_project_in_restricted_group) { create(:project, group: restricted_group) }
let(:unsynced_project) { create(:project) } let(:unsynced_project) { create(:project) }
...@@ -30,77 +29,82 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :geo_fdw, :clean_gitl ...@@ -30,77 +29,82 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :geo_fdw, :clean_gitl
create(:design, project: unsynced_project) create(:design, project: unsynced_project)
end end
it 'performs Geo::DesignRepositorySyncWorker for each project' do it 'does not perform Geo::DesignRepositorySyncWorker when shard becomes unhealthy' do
expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).twice.and_return(spy) Gitlab::ShardHealthCache.update([])
expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async)
subject.perform(shard_name) subject.perform(shard_name)
end end
it 'performs Geo::DesignRepositorySyncWorker for designs where last attempt to sync failed' do it 'does not perform Geo::DesignRepositorySyncWorker when no geo database is configured' do
create(:geo_design_registry, :sync_failed, project: unsynced_project_in_restricted_group) allow(Gitlab::Geo).to receive(:geo_database_configured?) { false }
create(:geo_design_registry, :synced, project: unsynced_project)
expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).once.and_return(spy) expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async)
subject.perform(shard_name) subject.perform(shard_name)
# We need to unstub here or the DatabaseCleaner will have issues since it
# will appear as though the tracking DB were not available
allow(Gitlab::Geo).to receive(:geo_database_configured?).and_call_original
end end
it 'does not perform Geo::DesignRepositorySyncWorker when shard becomes unhealthy' do it 'does not perform Geo::ProjectSyncWorker when not running on a secondary' do
Gitlab::ShardHealthCache.update([]) allow(Gitlab::Geo).to receive(:secondary?) { false }
expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async) expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async)
subject.perform(shard_name) subject.perform(shard_name)
end end
it 'performs Geo::DesignRepositorySyncWorker for designs updated recently' do it 'does not perform Geo::DesignRepositorySyncWorker when node is disabled' do
create(:geo_design_registry, project: unsynced_project_in_restricted_group) allow_any_instance_of(GeoNode).to receive(:enabled?) { false }
create(:geo_design_registry, :synced, project: unsynced_project)
create(:geo_design_registry)
expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).twice.and_return(spy) expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async)
subject.perform(shard_name) subject.perform(shard_name)
end end
it 'does not schedule a job twice for the same project' do context 'when geo_design_registry_ssot_sync is disabled' do
scheduled_jobs = [ before do
{ job_id: 1, project_id: unsynced_project.id }, stub_feature_flags(geo_design_registry_ssot_sync: false)
{ job_id: 2, project_id: unsynced_project_in_restricted_group.id } end
]
is_expected.to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(:once) it 'performs Geo::DesignRepositorySyncWorker for each project' do
is_expected.not_to receive(:schedule_job) expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).twice.and_return(spy)
Sidekiq::Testing.inline! { subject.perform(shard_name) } subject.perform(shard_name)
end end
it 'does not perform Geo::DesignRepositorySyncWorker when no geo database is configured' do it 'performs Geo::DesignRepositorySyncWorker for designs where last attempt to sync failed' do
allow(Gitlab::Geo).to receive(:geo_database_configured?) { false } create(:geo_design_registry, :sync_failed, project: unsynced_project_in_restricted_group)
create(:geo_design_registry, :synced, project: unsynced_project)
expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async) expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).once.and_return(spy)
subject.perform(shard_name) subject.perform(shard_name)
# We need to unstub here or the DatabaseCleaner will have issues since it
# will appear as though the tracking DB were not available
allow(Gitlab::Geo).to receive(:geo_database_configured?).and_call_original
end end
it 'does not perform Geo::ProjectSyncWorker when not running on a secondary' do it 'performs Geo::DesignRepositorySyncWorker for designs updated recently' do
allow(Gitlab::Geo).to receive(:secondary?) { false } create(:geo_design_registry, project: unsynced_project_in_restricted_group)
create(:geo_design_registry, :synced, project: unsynced_project)
create(:geo_design_registry)
expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async) expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).twice.and_return(spy)
subject.perform(shard_name) subject.perform(shard_name)
end end
it 'does not perform Geo::DesignRepositorySyncWorker when node is disabled' do it 'does not schedule a job twice for the same project' do
allow_any_instance_of(GeoNode).to receive(:enabled?) { false } scheduled_jobs = [
{ job_id: 1, project_id: unsynced_project.id },
{ job_id: 2, project_id: unsynced_project_in_restricted_group.id }
]
expect(Geo::DesignRepositorySyncWorker).not_to receive(:perform_async) is_expected.to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(:once)
is_expected.not_to receive(:schedule_job)
subject.perform(shard_name) Sidekiq::Testing.inline! { subject.perform(shard_name) }
end end
context 'multiple shards' do context 'multiple shards' do
...@@ -145,4 +149,68 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :geo_fdw, :clean_gitl ...@@ -145,4 +149,68 @@ RSpec.describe Geo::DesignRepositoryShardSyncWorker, :geo, :geo_fdw, :clean_gitl
end end
end end
end end
context 'when geo_design_registry_ssot_sync is enabled' do
before do
stub_feature_flags(geo_design_registry_ssot_sync: true)
end
it 'performs Geo::DesignRepositorySyncWorker for each registry' do
create(:geo_design_registry, project: unsynced_project_in_restricted_group)
create(:geo_design_registry, project: unsynced_project)
expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).twice.and_return(spy)
subject.perform(shard_name)
end
it 'performs Geo::DesignRepositorySyncWorker for designs where last attempt to sync failed' do
create(:geo_design_registry, :sync_failed, project: unsynced_project_in_restricted_group)
create(:geo_design_registry, :synced, project: unsynced_project)
expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).once.and_return(spy)
subject.perform(shard_name)
end
it 'performs Geo::DesignRepositorySyncWorker for designs updated recently' do
create(:geo_design_registry, project: unsynced_project_in_restricted_group)
create(:geo_design_registry, :synced, project: unsynced_project)
create(:geo_design_registry)
expect(Geo::DesignRepositorySyncWorker).to receive(:perform_async).twice.and_return(spy)
subject.perform(shard_name)
end
it 'does not schedule a job twice for the same project' do
create(:geo_design_registry, project: unsynced_project_in_restricted_group)
create(:geo_design_registry, project: unsynced_project)
scheduled_jobs = [
{ job_id: 1, project_id: unsynced_project.id },
{ job_id: 2, project_id: unsynced_project_in_restricted_group.id }
]
is_expected.to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(:once)
is_expected.not_to receive(:schedule_job)
Sidekiq::Testing.inline! { subject.perform(shard_name) }
end
context 'multiple shards' do
it 'uses two loops to schedule jobs', :sidekiq_might_not_need_inline do
expect(subject).to receive(:schedule_jobs).twice.and_call_original
Gitlab::ShardHealthCache.update([shard_name, 'shard2', 'shard3', 'shard4', 'shard5'])
secondary.update!(repos_max_capacity: 5)
create(:geo_design_registry, project: unsynced_project_in_restricted_group)
create(:geo_design_registry, project: unsynced_project)
subject.perform(shard_name)
end
end
end
end
end end
...@@ -79,12 +79,14 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do ...@@ -79,12 +79,14 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do
job_artifact = create(:ci_job_artifact) job_artifact = create(:ci_job_artifact)
lfs_object = create(:lfs_object) lfs_object = create(:lfs_object)
project = create(:project) project = create(:project)
create(:design, project: project)
upload = create(:upload) upload = create(:upload)
package_file = create(:conan_package_file, :conan_package) package_file = create(:conan_package_file, :conan_package)
expect(Geo::LfsObjectRegistry.where(lfs_object_id: lfs_object.id).count).to eq(0) expect(Geo::LfsObjectRegistry.where(lfs_object_id: lfs_object.id).count).to eq(0)
expect(Geo::JobArtifactRegistry.where(artifact_id: job_artifact.id).count).to eq(0) expect(Geo::JobArtifactRegistry.where(artifact_id: job_artifact.id).count).to eq(0)
expect(Geo::ProjectRegistry.where(project_id: project.id).count).to eq(0) expect(Geo::ProjectRegistry.where(project_id: project.id).count).to eq(0)
expect(Geo::DesignRegistry.where(project_id: project.id).count).to eq(0)
expect(Geo::UploadRegistry.where(file_id: upload.id).count).to eq(0) expect(Geo::UploadRegistry.where(file_id: upload.id).count).to eq(0)
expect(Geo::PackageFileRegistry.where(package_file_id: package_file.id).count).to eq(0) expect(Geo::PackageFileRegistry.where(package_file_id: package_file.id).count).to eq(0)
...@@ -93,6 +95,7 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do ...@@ -93,6 +95,7 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do
expect(Geo::LfsObjectRegistry.where(lfs_object_id: lfs_object.id).count).to eq(1) expect(Geo::LfsObjectRegistry.where(lfs_object_id: lfs_object.id).count).to eq(1)
expect(Geo::JobArtifactRegistry.where(artifact_id: job_artifact.id).count).to eq(1) expect(Geo::JobArtifactRegistry.where(artifact_id: job_artifact.id).count).to eq(1)
expect(Geo::ProjectRegistry.where(project_id: project.id).count).to eq(1) expect(Geo::ProjectRegistry.where(project_id: project.id).count).to eq(1)
expect(Geo::DesignRegistry.where(project_id: project.id).count).to eq(1)
expect(Geo::UploadRegistry.where(file_id: upload.id).count).to eq(1) expect(Geo::UploadRegistry.where(file_id: upload.id).count).to eq(1)
expect(Geo::PackageFileRegistry.where(package_file_id: package_file.id).count).to eq(1) expect(Geo::PackageFileRegistry.where(package_file_id: package_file.id).count).to eq(1)
end end
...@@ -111,6 +114,7 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do ...@@ -111,6 +114,7 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::LfsObjectRegistry, batch_size: 1000).and_call_original allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::LfsObjectRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::PackageFileRegistry, batch_size: 1000).and_call_original allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::PackageFileRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::UploadRegistry, batch_size: 1000).and_call_original allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::UploadRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::DesignRegistry, batch_size: 1000).and_call_original
expect(Geo::RegistryConsistencyService).not_to receive(:new).with(Geo::ProjectRegistry, batch_size: 1000) expect(Geo::RegistryConsistencyService).not_to receive(:new).with(Geo::ProjectRegistry, batch_size: 1000)
...@@ -118,6 +122,28 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do ...@@ -118,6 +122,28 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do
end end
end end
context 'when geo_design_registry_ssot_sync is disabled' do
before do
stub_feature_flags(geo_design_registry_ssot_sync: false)
end
it 'returns false' do
expect(subject.perform).to be_falsey
end
it 'does not execute RegistryConsistencyService for designs' do
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::JobArtifactRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::LfsObjectRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::PackageFileRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::UploadRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::ProjectRegistry, batch_size: 1000).and_call_original
expect(Geo::RegistryConsistencyService).not_to receive(:new).with(Geo::DesignRegistry, batch_size: 1000)
subject.perform
end
end
context 'when the current Geo node is disabled or primary' do context 'when the current Geo node is disabled or primary' do
before do before do
stub_primary_node stub_primary_node
......
...@@ -382,4 +382,11 @@ FactoryBot.define do ...@@ -382,4 +382,11 @@ FactoryBot.define do
) )
end end
end end
factory :project_with_design, parent: :project do
after(:create) do |project|
issue = create(:issue, project: project)
create(:design, project: project, issue: issue)
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment