Remove find_registry_differences method from Geo finders

Refactor code to make the finder class
closer to a Geo replicator.
parent 40aac725
......@@ -2,50 +2,6 @@
module Geo
class AttachmentRegistryFinder < FileRegistryFinder
# Returns untracked uploads as well as tracked uploads that are unused.
#
# Untracked uploads is an array where each item is a tuple of [id, file_type]
# that is supposed supposed to be synced but don't yet have a registry entry.
#
# Unused uploads is an array where each item is a tuple of [id, file_type]
# that is not supposed to be synced but already have a registry entry. For
# example:
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
#
# @return [Array] the first element is an Array of untracked uploads, and the
# second element is an Array of tracked uploads that are unused.
# For example: [[[1, 'avatar'], [5, 'file']], [[3, 'attachment']]]
def find_registry_differences(range)
# rubocop:disable CodeReuse/ActiveRecord
source =
replicables
.id_in(range)
.pluck(::Upload.arel_table[:id], ::Upload.arel_table[:uploader])
.map! { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
tracked =
registry_class
.model_id_in(range)
.pluck(:file_id, :file_type)
# rubocop:enable CodeReuse/ActiveRecord
untracked = source - tracked
unused_tracked = tracked - source
[untracked, unused_tracked]
end
def replicables
::Upload.replicables_for_geo_node
end
def registry_class
Geo::UploadRegistry
end
......
......@@ -2,10 +2,6 @@
module Geo
class ContainerRepositoryRegistryFinder < RegistryFinder
def replicables
current_node.container_repositories
end
def registry_class
Geo::ContainerRepositoryRegistry
end
......
......@@ -2,10 +2,6 @@
module Geo
class DesignRegistryFinder < RegistryFinder
def replicables
current_node.designs
end
def registry_class
Geo::DesignRegistry
end
......
......@@ -27,9 +27,5 @@ module Geo
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
def local_storage_only?
!current_node&.sync_object_storage
end
end
end
......@@ -2,10 +2,6 @@
module Geo
class JobArtifactRegistryFinder < FileRegistryFinder
def replicables
::Ci::JobArtifact.replicables_for_geo_node
end
def registry_class
Geo::JobArtifactRegistry
end
......
......@@ -2,18 +2,8 @@
module Geo
class LfsObjectRegistryFinder < FileRegistryFinder
def replicables
local_storage_only? ? lfs_objects.with_files_stored_locally : lfs_objects
end
def registry_class
Geo::LfsObjectRegistry
end
private
def lfs_objects
current_node.lfs_objects
end
end
end
......@@ -10,34 +10,6 @@ module Geo
@current_node_id = current_node_id
end
# @!method find_registry_differences
# Returns untracked IDs as well as tracked IDs that are unused.
#
# Untracked IDs are model IDs that are supposed to be synced but don't yet
# have a registry entry.
#
# Unused tracked IDs are model IDs that are not supposed to be synced but
# already have a registry entry. For example:
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
#
# @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused
def find_registry_differences(range)
source_ids = replicables.id_in(range).pluck(replicable_primary_key) # rubocop:disable CodeReuse/ActiveRecord
tracked_ids = registry_class.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
# @!method find_unsynced_registries
# Return an ActiveRecord::Relation of the registry records for the
# tracked ype that have never been synced.
......@@ -86,14 +58,6 @@ module Geo
"#{self.class} does not implement #{__method__}"
end
# @!method replicables
# Return an ActiveRecord::Relation of the replicable records for the
# tracked file_type(s)
def replicables
raise NotImplementedError,
"#{self.class} does not implement #{__method__}"
end
# @!method registry_count
# Return a count of the registry records for the tracked type(s)
def registry_count
......@@ -121,12 +85,5 @@ module Geo
GeoNode.find(current_node_id) if current_node_id
end
end
# @!method registry_class
# Return the fully qualified name of the replicable primary key for the
# tracked file_type(s)
def replicable_primary_key
registry_class::MODEL_CLASS.arel_table[:id]
end
end
end
......@@ -8,6 +8,12 @@ module EE
scope :project_id_in, ->(ids) { joins(:project).merge(::Project.id_in(ids)) }
end
class_methods do
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
node.container_repositories
end
end
def push_blob(digest, file_path)
client.push_blob(path, digest, file_path)
end
......
......@@ -16,6 +16,13 @@ module EE
scope :project_id_in, ->(ids) { joins(:projects).merge(::Project.id_in(ids)) }
end
class_methods do
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
local_storage_only = !node&.sync_object_storage
local_storage_only ? node.lfs_objects.with_files_stored_locally : node.lfs_objects
end
end
def log_geo_deleted_event
::Geo::LfsObjectDeletedEventStore.new(self).create!
end
......
......@@ -192,6 +192,10 @@ module EE
class_methods do
extend ::Gitlab::Utils::Override
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
node.projects
end
def search_by_visibility(level)
where(visibility_level: ::Gitlab::VisibilityLevel.string_options[level])
end
......
......@@ -41,10 +41,28 @@ class Geo::BaseRegistry < Geo::TrackingBase
end
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.replicator_class
self::MODEL_CLASS.replicator_class
end
def self.find_registry_differences(range)
source_ids = self::MODEL_CLASS
.replicables_for_geo_node
.id_in(range)
.pluck(self::MODEL_CLASS.arel_table[:id])
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
def self.find_unsynced_registries(batch_size:, except_ids: [])
pending
.model_id_not_in(except_ids)
......@@ -61,24 +79,6 @@ class Geo::BaseRegistry < Geo::TrackingBase
true
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
source_ids = self::MODEL_CLASS
.replicables_for_geo_node
.id_in(range)
.pluck(self::MODEL_CLASS.arel_table[:id])
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
def model_record_id
read_attribute(self.class::MODEL_FOREIGN_KEY)
end
......
......@@ -38,14 +38,6 @@ class Geo::ContainerRepositoryRegistry < Geo::BaseRegistry
end
end
def self.finder_class
::Geo::ContainerRepositoryRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
def self.find_failed_registries(batch_size:, except_ids: [])
super
.order(Gitlab::Database.nulls_first_order(:last_synced_at))
......
......@@ -51,12 +51,14 @@ class Geo::DesignRegistry < Geo::BaseRegistry
project_ids
end
def self.finder_class
::Geo::DesignRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
source_ids = Gitlab::Geo.current_node.designs.id_in(range).pluck_primary_key
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
def self.find_unsynced_registries(batch_size:, except_ids: [])
......
......@@ -6,14 +6,6 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry
MODEL_CLASS = ::Ci::JobArtifact
MODEL_FOREIGN_KEY = :artifact_id
def self.finder_class
::Geo::JobArtifactRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
# When false, RegistryConsistencyService will frequently check the end of the
# table to quickly handle new replicables.
def self.has_create_events?
......
......@@ -11,14 +11,6 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry
belongs_to :lfs_object, class_name: 'LfsObject'
def self.finder_class
::Geo::LfsObjectRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
# If false, RegistryConsistencyService will frequently check the end of the
# table to quickly handle new replicables.
def self.has_create_events?
......
......@@ -44,16 +44,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(nil).pluck(:project_id)
end
def self.find_registry_differences(range)
source_ids = Gitlab::Geo.current_node.projects.id_in(range).pluck_primary_key
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
def self.find_failed_registries(batch_size:, except_ids: [])
super
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at))
......
......@@ -12,12 +12,41 @@ class Geo::UploadRegistry < Geo::BaseRegistry
scope :fresh, -> { order(created_at: :desc) }
def self.finder_class
::Geo::AttachmentRegistryFinder
end
# Returns untracked uploads as well as tracked uploads that are unused.
#
# Untracked uploads is an array where each item is a tuple of [id, file_type]
# that is supposed to be synced but don't yet have a registry entry.
#
# Unused uploads is an array where each item is a tuple of [id, file_type]
# that is not supposed to be synced but already have a registry entry. For
# example:
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
#
# @return [Array] the first element is an Array of untracked uploads, and the
# second element is an Array of tracked uploads that are unused.
# For example: [[[1, 'avatar'], [5, 'file']], [[3, 'attachment']]]
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
source =
self::MODEL_CLASS.replicables_for_geo_node
.id_in(range)
.pluck(self::MODEL_CLASS.arel_table[:id], self::MODEL_CLASS.arel_table[:uploader])
.map! { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
tracked =
self.model_id_in(range)
.pluck(:file_id, :file_type)
untracked = source - tracked
unused_tracked = tracked - source
[untracked, unused_tracked]
end
# If false, RegistryConsistencyService will frequently check the end of the
......
......@@ -2,25 +2,15 @@
require 'spec_helper'
RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do
include ::EE::GeoHelpers
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_synced_group) { create(:project, group: synced_group) }
let_it_be(:project_nested_group) { create(:project, group: nested_group) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:container_repository_1) { create(:container_repository, project: project_synced_group) }
let_it_be(:container_repository_2) { create(:container_repository, project: project_nested_group) }
let_it_be(:container_repository_3) { create(:container_repository) }
let_it_be(:container_repository_4) { create(:container_repository) }
let_it_be(:container_repository_5) { create(:container_repository, project: project_broken_storage) }
let_it_be(:container_repository_6) { create(:container_repository, project: project_broken_storage) }
subject { described_class.new(current_node_id: secondary.id) }
let_it_be(:project) { create(:project) }
let_it_be(:container_repository_1) { create(:container_repository, project: project) }
let_it_be(:container_repository_2) { create(:container_repository, project: project) }
let_it_be(:container_repository_3) { create(:container_repository, project: project) }
let_it_be(:container_repository_4) { create(:container_repository, project: project) }
let_it_be(:container_repository_5) { create(:container_repository, project: project) }
let_it_be(:container_repository_6) { create(:container_repository, project: project) }
before do
stub_current_geo_node(secondary)
stub_registry_replication_config(enabled: true)
end
......@@ -51,140 +41,6 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do
end
end
describe '#find_registry_differences' do
context 'untracked IDs' do
before do
create(:container_repository_registry, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
create(:container_repository_registry, container_repository_id: container_repository_5.id)
end
it 'includes container registries IDs without an entry on the tracking database' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id, container_repository_4.id, container_repository_6.id])
end
it 'excludes container registries outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(container_repository_4.id..container_repository_6.id)
expect(untracked_ids).to match_array([container_repository_4.id, container_repository_6.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes container_registry IDs that projects are not in the selected namespaces' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes container_registry IDs that projects are not in the selected shards' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
before do
container_repository_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
it 'excludes IDs outside the ID range' do
range = (container_repository_1.id + 1)..ContainerRepository.maximum(:id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked container_registry' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_3.id) }
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_3.id..container_repository_3.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked container_registry' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
context 'excluded from selective sync' do
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_5.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_5.id..container_repository_5.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#find_unsynced_registries' do
let_it_be(:registry_container_registry_1) { create(:container_repository_registry, :synced, container_repository_id: container_repository_1.id) }
let_it_be(:registry_container_registry_2) { create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_2.id) }
......
......@@ -5,7 +5,6 @@ require 'spec_helper'
RSpec.describe Geo::FileRegistryFinder, :geo do
context 'with abstract methods' do
%w[
replicables
registry_class
].each do |required_method|
it "requires subclasses to implement #{required_method}" do
......@@ -13,24 +12,4 @@ RSpec.describe Geo::FileRegistryFinder, :geo do
end
end
end
describe '#local_storage_only?' do
subject { described_class.new(current_node_id: geo_node.id) }
context 'sync_object_storage is enabled' do
let(:geo_node) { create(:geo_node, sync_object_storage: true) }
it 'returns false' do
expect(subject.local_storage_only?).to be_falsey
end
end
context 'sync_object_storage is disabled' do
let(:geo_node) { create(:geo_node, sync_object_storage: false) }
it 'returns true' do
expect(subject.local_storage_only?).to be_truthy
end
end
end
end
......@@ -3,23 +3,23 @@
require 'spec_helper'
RSpec.describe Geo::ContainerRepositoryRegistry, :geo do
include ::EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::ContainerRepositoryRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:container_repository_registry, 10, created_at: Time.zone.now) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end
let_it_be(:registry) { create(:container_repository_registry) }
it_behaves_like 'a Geo registry' do
let(:registry) { create(:container_repository_registry) }
end
describe 'relationships' do
it { is_expected.to belong_to(:container_repository) }
end
it_behaves_like 'a Geo registry' do
let(:registry) { create(:container_repository_registry) }
end
describe '#finish_sync!' do
let(:registry) { create(:container_repository_registry, :sync_started) }
let_it_be(:registry) { create(:container_repository_registry, :sync_started) }
it 'finishes registry record' do
registry.finish_sync!
......@@ -58,6 +58,158 @@ RSpec.describe Geo::ContainerRepositoryRegistry, :geo do
end
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_synced_group) { create(:project, group: synced_group) }
let_it_be(:project_nested_group) { create(:project, group: nested_group) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:container_repository_1) { create(:container_repository, project: project_synced_group) }
let_it_be(:container_repository_2) { create(:container_repository, project: project_nested_group) }
let_it_be(:container_repository_3) { create(:container_repository) }
let_it_be(:container_repository_4) { create(:container_repository) }
let_it_be(:container_repository_5) { create(:container_repository, project: project_broken_storage) }
let_it_be(:container_repository_6) { create(:container_repository, project: project_broken_storage) }
before do
stub_current_geo_node(secondary)
stub_registry_replication_config(enabled: true)
end
context 'untracked IDs' do
before do
create(:container_repository_registry, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
create(:container_repository_registry, container_repository_id: container_repository_5.id)
end
it 'includes container registries IDs without an entry on the tracking database' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id, container_repository_4.id, container_repository_6.id])
end
it 'excludes container registries outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(container_repository_4.id..container_repository_6.id)
expect(untracked_ids).to match_array([container_repository_4.id, container_repository_6.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes container_registry IDs that projects are not in the selected namespaces' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes container_registry IDs that projects are not in the selected shards' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
before do
container_repository_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
it 'excludes IDs outside the ID range' do
range = (container_repository_1.id + 1)..ContainerRepository.maximum(:id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked container_registry' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_3.id) }
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_3.id..container_repository_3.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked container_registry' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
context 'excluded from selective sync' do
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_5.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_5.id..container_repository_5.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '.replication_enabled?' do
it 'returns true when registry replication is enabled' do
stub_geo_setting(registry_replication: { enabled: true })
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Geo::DesignRegistry, :geo do
include ::EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::DesignRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_design_registry, 10, created_at: Time.zone.now) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
......@@ -16,6 +18,172 @@ RSpec.describe Geo::DesignRegistry, :geo do
let(:registry) { create(:geo_design_registry) }
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_1) { create(:project, group: synced_group) }
let_it_be(:project_2) { create(:project, group: nested_group) }
let_it_be(:project_3) { create(:project) }
let_it_be(:project_4) { create(:project) }
let_it_be(:project_5) { create(:project, :broken_storage) }
let_it_be(:project_6) { create(:project, :broken_storage) }
let_it_be(:project_7) { create(:project) }
before do
stub_current_geo_node(secondary)
end
before_all do
create(:design, project: project_1)
create(:design, project: project_2)
create(:design, project: project_3)
create(:design, project: project_4)
create(:design, project: project_5)
create(:design, project: project_6)
end
context 'untracked IDs' do
before do
create(:geo_design_registry, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_3.id)
create(:geo_design_registry, project_id: project_5.id)
end
it 'includes project IDs without an entry on the tracking database' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id, project_4.id, project_6.id])
end
it 'excludes projects outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(project_4.id..project_6.id)
expect(untracked_ids).to match_array([project_4.id, project_6.id])
end
it 'excludes projects without designs' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).not_to include([project_7])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_design_registry, project_id: project_1.id) }
before do
project_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
it 'excludes IDs outside the ID range' do
range = (project_1.id + 1)..Project.maximum(:id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked project' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_3.id) }
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_3.id..project_3.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked project' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
context 'excluded from selective sync' do
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_5.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_5.id..project_5.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#search' do
let!(:design_registry) { create(:geo_design_registry) }
let!(:failed_registry) { create(:geo_design_registry, :sync_failed) }
......
......@@ -3,12 +3,203 @@
require 'spec_helper'
RSpec.describe Geo::LfsObjectRegistry, :geo do
describe 'relationships' do
it { is_expected.to belong_to(:lfs_object).class_name('LfsObject') }
end
include EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::LfsObjectRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_lfs_object_registry, 10) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end
describe 'relationships' do
it { is_expected.to belong_to(:lfs_object).class_name('LfsObject') }
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
before do
stub_current_geo_node(secondary)
stub_lfs_object_storage
end
let_it_be(:lfs_object_1) { create(:lfs_object) }
let_it_be(:lfs_object_2) { create(:lfs_object) }
let_it_be(:lfs_object_3) { create(:lfs_object) }
let_it_be(:lfs_object_4) { create(:lfs_object) }
let_it_be(:lfs_object_5) { create(:lfs_object) }
let!(:lfs_object_remote_1) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_2) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_3) { create(:lfs_object, :object_storage) }
context 'untracked IDs' do
before do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_4.id)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_3)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_4)
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_5)
end
it 'includes LFS object IDs without an entry on the tracking database' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array(
[lfs_object_2.id, lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id, lfs_object_remote_3.id])
end
it 'excludes LFS objects outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(lfs_object_3.id..lfs_object_remote_2.id)
expect(untracked_ids).to match_array(
[lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes LFS objects in object storage' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id, lfs_object_5.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_lfs_object_registry, lfs_object_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked LFS object' do
context 'in object storage' do
it 'includes tracked LFS object IDs that are in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_1.id)
range = lfs_object_remote_1.id..lfs_object_remote_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_remote_1.id])
end
end
context 'not in object storage' do
it 'excludes tracked LFS object IDs that are not in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
range = lfs_object_1.id..lfs_object_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
end
......@@ -3,8 +3,7 @@
require 'spec_helper'
RSpec.describe Geo::UploadRegistry, :geo do
let!(:failed) { create(:geo_upload_registry, :failed) }
let!(:synced) { create(:geo_upload_registry) }
include EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::UploadRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_upload_registry, 10, created_at: Time.zone.now) }
......@@ -17,20 +16,75 @@ RSpec.describe Geo::UploadRegistry, :geo do
expect(described_class.find(registry.id).upload).to be_an_instance_of(Upload)
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:project) { create(:project) }
let_it_be(:upload_1) { create(:upload, model: project) }
let_it_be(:upload_2) { create(:upload, model: project) }
let_it_be(:upload_3) { create(:upload, :issuable_upload, model: project) }
let_it_be(:upload_4) { create(:upload, model: project) }
let_it_be(:upload_5) { create(:upload, model: project) }
let_it_be(:upload_6) { create(:upload, :personal_snippet_upload) }
let_it_be(:upload_7) { create(:upload, :object_storage, model: project) }
let_it_be(:upload_8) { create(:upload, :object_storage, model: project) }
let_it_be(:upload_9) { create(:upload, :object_storage, model: project) }
before do
stub_current_geo_node(secondary)
end
it 'returns untracked IDs as well as tracked IDs that are unused', :aggregate_failures do
max_id = Upload.maximum(:id)
create(:geo_upload_registry, :avatar, file_id: upload_1.id)
create(:geo_upload_registry, :file, file_id: upload_3.id)
create(:geo_upload_registry, :avatar, file_id: upload_5.id)
create(:geo_upload_registry, :personal_file, file_id: upload_6.id)
create(:geo_upload_registry, :avatar, file_id: upload_7.id)
unused_registry_1 = create(:geo_upload_registry, :attachment, file_id: max_id + 1)
unused_registry_2 = create(:geo_upload_registry, :personal_file, file_id: max_id + 2)
range = 1..(max_id + 2)
untracked, unused = described_class.find_registry_differences(range)
expected_untracked = [
[upload_2.id, 'avatar'],
[upload_4.id, 'avatar'],
[upload_8.id, 'avatar'],
[upload_9.id, 'avatar']
]
expected_unused = [
[unused_registry_1.file_id, 'attachment'],
[unused_registry_2.file_id, 'personal_file']
]
expect(untracked).to match_array(expected_untracked)
expect(unused).to match_array(expected_unused)
end
end
describe '.failed' do
it 'returns registries in the failed state' do
failed = create(:geo_upload_registry, :failed)
create(:geo_upload_registry)
expect(described_class.failed).to match_ids(failed)
end
end
describe '.synced' do
it 'returns registries in the synced state' do
create(:geo_upload_registry, :failed)
synced = create(:geo_upload_registry)
expect(described_class.synced).to match_ids(synced)
end
end
describe '.retry_due' do
it 'returns registries in the synced state' do
failed = create(:geo_upload_registry, :failed)
synced = create(:geo_upload_registry)
retry_yesterday = create(:geo_upload_registry, retry_at: Date.yesterday)
create(:geo_upload_registry, retry_at: Date.tomorrow)
......@@ -40,6 +94,8 @@ RSpec.describe Geo::UploadRegistry, :geo do
describe '.pending' do
it 'returns registries that are never synced' do
create(:geo_upload_registry, :failed)
create(:geo_upload_registry)
pending = create(:geo_upload_registry, retry_count: nil, success: false)
expect(described_class.pending).to match_ids([pending])
......@@ -91,6 +147,9 @@ RSpec.describe Geo::UploadRegistry, :geo do
end
describe '#synchronization_state' do
let_it_be(:failed) { create(:geo_upload_registry, :failed) }
let_it_be(:synced) { create(:geo_upload_registry) }
it 'returns :synced for a successful synced registry' do
expect(synced.synchronization_state).to eq(:synced)
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment