Remove find_registry_differences method from Geo finders

Refactor code to make the finder class
closer to a Geo replicator.
parent 40aac725
...@@ -2,50 +2,6 @@ ...@@ -2,50 +2,6 @@
module Geo module Geo
class AttachmentRegistryFinder < FileRegistryFinder class AttachmentRegistryFinder < FileRegistryFinder
# Returns untracked uploads as well as tracked uploads that are unused.
#
# Untracked uploads is an array where each item is a tuple of [id, file_type]
# that is supposed supposed to be synced but don't yet have a registry entry.
#
# Unused uploads is an array where each item is a tuple of [id, file_type]
# that is not supposed to be synced but already have a registry entry. For
# example:
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
#
# @return [Array] the first element is an Array of untracked uploads, and the
# second element is an Array of tracked uploads that are unused.
# For example: [[[1, 'avatar'], [5, 'file']], [[3, 'attachment']]]
def find_registry_differences(range)
# rubocop:disable CodeReuse/ActiveRecord
source =
replicables
.id_in(range)
.pluck(::Upload.arel_table[:id], ::Upload.arel_table[:uploader])
.map! { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
tracked =
registry_class
.model_id_in(range)
.pluck(:file_id, :file_type)
# rubocop:enable CodeReuse/ActiveRecord
untracked = source - tracked
unused_tracked = tracked - source
[untracked, unused_tracked]
end
def replicables
::Upload.replicables_for_geo_node
end
def registry_class def registry_class
Geo::UploadRegistry Geo::UploadRegistry
end end
......
...@@ -2,10 +2,6 @@ ...@@ -2,10 +2,6 @@
module Geo module Geo
class ContainerRepositoryRegistryFinder < RegistryFinder class ContainerRepositoryRegistryFinder < RegistryFinder
def replicables
current_node.container_repositories
end
def registry_class def registry_class
Geo::ContainerRepositoryRegistry Geo::ContainerRepositoryRegistry
end end
......
...@@ -2,10 +2,6 @@ ...@@ -2,10 +2,6 @@
module Geo module Geo
class DesignRegistryFinder < RegistryFinder class DesignRegistryFinder < RegistryFinder
def replicables
current_node.designs
end
def registry_class def registry_class
Geo::DesignRegistry Geo::DesignRegistry
end end
......
...@@ -27,9 +27,5 @@ module Geo ...@@ -27,9 +27,5 @@ module Geo
.limit(batch_size) .limit(batch_size)
end end
# rubocop:enable CodeReuse/ActiveRecord # rubocop:enable CodeReuse/ActiveRecord
def local_storage_only?
!current_node&.sync_object_storage
end
end end
end end
...@@ -2,10 +2,6 @@ ...@@ -2,10 +2,6 @@
module Geo module Geo
class JobArtifactRegistryFinder < FileRegistryFinder class JobArtifactRegistryFinder < FileRegistryFinder
def replicables
::Ci::JobArtifact.replicables_for_geo_node
end
def registry_class def registry_class
Geo::JobArtifactRegistry Geo::JobArtifactRegistry
end end
......
...@@ -2,18 +2,8 @@ ...@@ -2,18 +2,8 @@
module Geo module Geo
class LfsObjectRegistryFinder < FileRegistryFinder class LfsObjectRegistryFinder < FileRegistryFinder
def replicables
local_storage_only? ? lfs_objects.with_files_stored_locally : lfs_objects
end
def registry_class def registry_class
Geo::LfsObjectRegistry Geo::LfsObjectRegistry
end end
private
def lfs_objects
current_node.lfs_objects
end
end end
end end
...@@ -10,34 +10,6 @@ module Geo ...@@ -10,34 +10,6 @@ module Geo
@current_node_id = current_node_id @current_node_id = current_node_id
end end
# @!method find_registry_differences
# Returns untracked IDs as well as tracked IDs that are unused.
#
# Untracked IDs are model IDs that are supposed to be synced but don't yet
# have a registry entry.
#
# Unused tracked IDs are model IDs that are not supposed to be synced but
# already have a registry entry. For example:
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
#
# @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused
def find_registry_differences(range)
source_ids = replicables.id_in(range).pluck(replicable_primary_key) # rubocop:disable CodeReuse/ActiveRecord
tracked_ids = registry_class.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
# @!method find_unsynced_registries # @!method find_unsynced_registries
# Return an ActiveRecord::Relation of the registry records for the # Return an ActiveRecord::Relation of the registry records for the
# tracked ype that have never been synced. # tracked ype that have never been synced.
...@@ -86,14 +58,6 @@ module Geo ...@@ -86,14 +58,6 @@ module Geo
"#{self.class} does not implement #{__method__}" "#{self.class} does not implement #{__method__}"
end end
# @!method replicables
# Return an ActiveRecord::Relation of the replicable records for the
# tracked file_type(s)
def replicables
raise NotImplementedError,
"#{self.class} does not implement #{__method__}"
end
# @!method registry_count # @!method registry_count
# Return a count of the registry records for the tracked type(s) # Return a count of the registry records for the tracked type(s)
def registry_count def registry_count
...@@ -121,12 +85,5 @@ module Geo ...@@ -121,12 +85,5 @@ module Geo
GeoNode.find(current_node_id) if current_node_id GeoNode.find(current_node_id) if current_node_id
end end
end end
# @!method registry_class
# Return the fully qualified name of the replicable primary key for the
# tracked file_type(s)
def replicable_primary_key
registry_class::MODEL_CLASS.arel_table[:id]
end
end end
end end
...@@ -8,6 +8,12 @@ module EE ...@@ -8,6 +8,12 @@ module EE
scope :project_id_in, ->(ids) { joins(:project).merge(::Project.id_in(ids)) } scope :project_id_in, ->(ids) { joins(:project).merge(::Project.id_in(ids)) }
end end
class_methods do
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
node.container_repositories
end
end
def push_blob(digest, file_path) def push_blob(digest, file_path)
client.push_blob(path, digest, file_path) client.push_blob(path, digest, file_path)
end end
......
...@@ -16,6 +16,13 @@ module EE ...@@ -16,6 +16,13 @@ module EE
scope :project_id_in, ->(ids) { joins(:projects).merge(::Project.id_in(ids)) } scope :project_id_in, ->(ids) { joins(:projects).merge(::Project.id_in(ids)) }
end end
class_methods do
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
local_storage_only = !node&.sync_object_storage
local_storage_only ? node.lfs_objects.with_files_stored_locally : node.lfs_objects
end
end
def log_geo_deleted_event def log_geo_deleted_event
::Geo::LfsObjectDeletedEventStore.new(self).create! ::Geo::LfsObjectDeletedEventStore.new(self).create!
end end
......
...@@ -192,6 +192,10 @@ module EE ...@@ -192,6 +192,10 @@ module EE
class_methods do class_methods do
extend ::Gitlab::Utils::Override extend ::Gitlab::Utils::Override
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
node.projects
end
def search_by_visibility(level) def search_by_visibility(level)
where(visibility_level: ::Gitlab::VisibilityLevel.string_options[level]) where(visibility_level: ::Gitlab::VisibilityLevel.string_options[level])
end end
......
...@@ -41,10 +41,28 @@ class Geo::BaseRegistry < Geo::TrackingBase ...@@ -41,10 +41,28 @@ class Geo::BaseRegistry < Geo::TrackingBase
end end
end end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.replicator_class def self.replicator_class
self::MODEL_CLASS.replicator_class self::MODEL_CLASS.replicator_class
end end
def self.find_registry_differences(range)
source_ids = self::MODEL_CLASS
.replicables_for_geo_node
.id_in(range)
.pluck(self::MODEL_CLASS.arel_table[:id])
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
def self.find_unsynced_registries(batch_size:, except_ids: []) def self.find_unsynced_registries(batch_size:, except_ids: [])
pending pending
.model_id_not_in(except_ids) .model_id_not_in(except_ids)
...@@ -61,24 +79,6 @@ class Geo::BaseRegistry < Geo::TrackingBase ...@@ -61,24 +79,6 @@ class Geo::BaseRegistry < Geo::TrackingBase
true true
end end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
source_ids = self::MODEL_CLASS
.replicables_for_geo_node
.id_in(range)
.pluck(self::MODEL_CLASS.arel_table[:id])
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
def model_record_id def model_record_id
read_attribute(self.class::MODEL_FOREIGN_KEY) read_attribute(self.class::MODEL_FOREIGN_KEY)
end end
......
...@@ -38,14 +38,6 @@ class Geo::ContainerRepositoryRegistry < Geo::BaseRegistry ...@@ -38,14 +38,6 @@ class Geo::ContainerRepositoryRegistry < Geo::BaseRegistry
end end
end end
def self.finder_class
::Geo::ContainerRepositoryRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
def self.find_failed_registries(batch_size:, except_ids: []) def self.find_failed_registries(batch_size:, except_ids: [])
super super
.order(Gitlab::Database.nulls_first_order(:last_synced_at)) .order(Gitlab::Database.nulls_first_order(:last_synced_at))
......
...@@ -51,12 +51,14 @@ class Geo::DesignRegistry < Geo::BaseRegistry ...@@ -51,12 +51,14 @@ class Geo::DesignRegistry < Geo::BaseRegistry
project_ids project_ids
end end
def self.finder_class
::Geo::DesignRegistryFinder
end
def self.find_registry_differences(range) def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range) source_ids = Gitlab::Geo.current_node.designs.id_in(range).pluck_primary_key
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end end
def self.find_unsynced_registries(batch_size:, except_ids: []) def self.find_unsynced_registries(batch_size:, except_ids: [])
......
...@@ -6,14 +6,6 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry ...@@ -6,14 +6,6 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry
MODEL_CLASS = ::Ci::JobArtifact MODEL_CLASS = ::Ci::JobArtifact
MODEL_FOREIGN_KEY = :artifact_id MODEL_FOREIGN_KEY = :artifact_id
def self.finder_class
::Geo::JobArtifactRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
# When false, RegistryConsistencyService will frequently check the end of the # When false, RegistryConsistencyService will frequently check the end of the
# table to quickly handle new replicables. # table to quickly handle new replicables.
def self.has_create_events? def self.has_create_events?
......
...@@ -11,14 +11,6 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry ...@@ -11,14 +11,6 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry
belongs_to :lfs_object, class_name: 'LfsObject' belongs_to :lfs_object, class_name: 'LfsObject'
def self.finder_class
::Geo::LfsObjectRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
# If false, RegistryConsistencyService will frequently check the end of the # If false, RegistryConsistencyService will frequently check the end of the
# table to quickly handle new replicables. # table to quickly handle new replicables.
def self.has_create_events? def self.has_create_events?
......
...@@ -44,16 +44,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry ...@@ -44,16 +44,6 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(nil).pluck(:project_id) where(nil).pluck(:project_id)
end end
def self.find_registry_differences(range)
source_ids = Gitlab::Geo.current_node.projects.id_in(range).pluck_primary_key
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
def self.find_failed_registries(batch_size:, except_ids: []) def self.find_failed_registries(batch_size:, except_ids: [])
super super
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at)) .order(Gitlab::Database.nulls_first_order(:last_repository_synced_at))
......
...@@ -12,12 +12,41 @@ class Geo::UploadRegistry < Geo::BaseRegistry ...@@ -12,12 +12,41 @@ class Geo::UploadRegistry < Geo::BaseRegistry
scope :fresh, -> { order(created_at: :desc) } scope :fresh, -> { order(created_at: :desc) }
def self.finder_class # Returns untracked uploads as well as tracked uploads that are unused.
::Geo::AttachmentRegistryFinder #
end # Untracked uploads is an array where each item is a tuple of [id, file_type]
# that is supposed to be synced but don't yet have a registry entry.
#
# Unused uploads is an array where each item is a tuple of [id, file_type]
# that is not supposed to be synced but already have a registry entry. For
# example:
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
#
# @return [Array] the first element is an Array of untracked uploads, and the
# second element is an Array of tracked uploads that are unused.
# For example: [[[1, 'avatar'], [5, 'file']], [[3, 'attachment']]]
def self.find_registry_differences(range) def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range) source =
self::MODEL_CLASS.replicables_for_geo_node
.id_in(range)
.pluck(self::MODEL_CLASS.arel_table[:id], self::MODEL_CLASS.arel_table[:uploader])
.map! { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
tracked =
self.model_id_in(range)
.pluck(:file_id, :file_type)
untracked = source - tracked
unused_tracked = tracked - source
[untracked, unused_tracked]
end end
# If false, RegistryConsistencyService will frequently check the end of the # If false, RegistryConsistencyService will frequently check the end of the
......
...@@ -3,150 +3,58 @@ ...@@ -3,150 +3,58 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::AttachmentRegistryFinder, :geo do RSpec.describe Geo::AttachmentRegistryFinder, :geo do
include ::EE::GeoHelpers let_it_be(:project) { create(:project) }
let_it_be(:upload_1) { create(:upload, model: project) }
let_it_be(:secondary) { create(:geo_node) } let_it_be(:upload_2) { create(:upload, model: project) }
let_it_be(:upload_3) { create(:upload, :issuable_upload, model: project) }
let_it_be(:synced_group) { create(:group) } let_it_be(:upload_4) { create(:upload, model: project) }
let_it_be(:synced_subgroup) { create(:group, parent: synced_group) } let_it_be(:upload_5) { create(:upload, model: project) }
let_it_be(:unsynced_group) { create(:group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: synced_subgroup) }
let_it_be(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
let_it_be(:upload_1) { create(:upload, model: synced_group) }
let_it_be(:upload_2) { create(:upload, model: unsynced_group) }
let_it_be(:upload_3) { create(:upload, :issuable_upload, model: synced_project_in_nested_group) }
let_it_be(:upload_4) { create(:upload, model: unsynced_project) }
let_it_be(:upload_5) { create(:upload, model: synced_project) }
let_it_be(:upload_6) { create(:upload, :personal_snippet_upload) } let_it_be(:upload_6) { create(:upload, :personal_snippet_upload) }
let_it_be(:upload_7) { create(:upload, :object_storage, model: synced_project) } let_it_be(:upload_7) { create(:upload, :object_storage, model: project) }
let_it_be(:upload_8) { create(:upload, :object_storage, model: unsynced_project) } let_it_be(:upload_8) { create(:upload, :object_storage, model: project) }
let_it_be(:upload_9) { create(:upload, :object_storage, model: synced_group) } let_it_be(:upload_9) { create(:upload, :object_storage, model: project) }
before do let_it_be(:registry_upload_1) { create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id) }
stub_current_geo_node(secondary) let_it_be(:registry_upload_2) { create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true) }
end let_it_be(:registry_upload_3) { create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id) }
let_it_be(:registry_upload_4) { create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id) }
subject { described_class.new(current_node_id: secondary.id) } let_it_be(:registry_upload_5) { create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago) }
let_it_be(:registry_upload_6) { create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id) }
let_it_be(:registry_upload_7) { create(:geo_upload_registry, :attachment, :failed, file_id: upload_7.id, missing_on_primary: true) }
let_it_be(:registry_upload_8) { create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id) }
describe '#registry_count' do describe '#registry_count' do
it 'counts registries for uploads' do it 'counts registries for uploads' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
expect(subject.registry_count).to eq 8 expect(subject.registry_count).to eq 8
end end
end end
describe '#synced_count' do describe '#synced_count' do
it 'counts registries that has been synced' do it 'counts registries that has been synced' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id) expect(subject.synced_count).to eq 2
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
expect(subject.synced_count).to eq 3
end end
end end
describe '#failed_count' do describe '#failed_count' do
it 'counts registries that sync has failed' do it 'counts registries that sync has failed' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id) expect(subject.failed_count).to eq 4
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
expect(subject.failed_count).to eq 3
end end
end end
describe '#synced_missing_on_primary_count' do describe '#synced_missing_on_primary_count' do
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id) expect(subject.synced_missing_on_primary_count).to eq 2
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
expect(subject.synced_missing_on_primary_count).to eq 3
end
end
describe '#find_registry_differences' do
it 'returns untracked IDs as well as tracked IDs that are unused', :aggregate_failures do
max_id = Upload.maximum(:id)
create(:geo_upload_registry, :avatar, file_id: upload_1.id)
create(:geo_upload_registry, :file, file_id: upload_3.id)
create(:geo_upload_registry, :avatar, file_id: upload_5.id)
create(:geo_upload_registry, :personal_file, file_id: upload_6.id)
create(:geo_upload_registry, :avatar, file_id: upload_7.id)
unused_registry_1 = create(:geo_upload_registry, :attachment, file_id: max_id + 1)
unused_registry_2 = create(:geo_upload_registry, :personal_file, file_id: max_id + 2)
range = 1..(max_id + 2)
untracked, unused = subject.find_registry_differences(range)
expected_untracked = [
[upload_2.id, 'avatar'],
[upload_4.id, 'avatar'],
[upload_8.id, 'avatar'],
[upload_9.id, 'avatar']
]
expected_unused = [
[unused_registry_1.file_id, 'attachment'],
[unused_registry_2.file_id, 'personal_file']
]
expect(untracked).to match_array(expected_untracked)
expect(unused).to match_array(expected_unused)
end end
end end
describe '#find_unsynced_registries' do describe '#find_unsynced_registries' do
it 'returns registries for uploads that have never been synced' do it 'returns registries for uploads that have never been synced' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
registry_upload_3 = create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
registry_upload_8 = create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_unsynced_registries(batch_size: 10) registries = subject.find_unsynced_registries(batch_size: 10)
expect(registries).to match_ids(registry_upload_3, registry_upload_8) expect(registries).to match_ids(registry_upload_3, registry_upload_8)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
registry_upload_8 = create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_unsynced_registries(batch_size: 10, except_ids: [upload_3.id]) registries = subject.find_unsynced_registries(batch_size: 10, except_ids: [upload_3.id])
expect(registries).to match_ids(registry_upload_8) expect(registries).to match_ids(registry_upload_8)
...@@ -155,31 +63,13 @@ RSpec.describe Geo::AttachmentRegistryFinder, :geo do ...@@ -155,31 +63,13 @@ RSpec.describe Geo::AttachmentRegistryFinder, :geo do
describe '#find_failed_registries' do describe '#find_failed_registries' do
it 'returns registries for job artifacts that have failed to sync' do it 'returns registries for job artifacts that have failed to sync' do
registry_upload_1 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
registry_upload_4 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_upload_6 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_failed_registries(batch_size: 10) registries = subject.find_failed_registries(batch_size: 10)
expect(registries).to match_ids(registry_upload_1, registry_upload_4, registry_upload_6) expect(registries).to match_ids(registry_upload_1, registry_upload_4, registry_upload_6, registry_upload_7)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
registry_upload_1 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id) registries = subject.find_failed_registries(batch_size: 10, except_ids: [upload_4.id, upload_7.id])
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_upload_6 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_failed_registries(batch_size: 10, except_ids: [upload_4.id])
expect(registries).to match_ids(registry_upload_1, registry_upload_6) expect(registries).to match_ids(registry_upload_1, registry_upload_6)
end end
...@@ -187,30 +77,12 @@ RSpec.describe Geo::AttachmentRegistryFinder, :geo do ...@@ -187,30 +77,12 @@ RSpec.describe Geo::AttachmentRegistryFinder, :geo do
describe '#find_retryable_synced_missing_on_primary_registries' do describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries for job artifacts that have been synced and are missing on the primary' do it 'returns registries for job artifacts that have been synced and are missing on the primary' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
registry_upload_2 = create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
registry_upload_5 = create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10) registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(registries).to match_ids(registry_upload_2, registry_upload_5) expect(registries).to match_ids(registry_upload_2, registry_upload_5)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
registry_upload_2 = create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [upload_5.id]) registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [upload_5.id])
expect(registries).to match_ids(registry_upload_2) expect(registries).to match_ids(registry_upload_2)
......
...@@ -2,25 +2,15 @@ ...@@ -2,25 +2,15 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do
include ::EE::GeoHelpers let_it_be(:project) { create(:project) }
let_it_be(:container_repository_1) { create(:container_repository, project: project) }
let_it_be(:secondary) { create(:geo_node) } let_it_be(:container_repository_2) { create(:container_repository, project: project) }
let_it_be(:synced_group) { create(:group) } let_it_be(:container_repository_3) { create(:container_repository, project: project) }
let_it_be(:nested_group) { create(:group, parent: synced_group) } let_it_be(:container_repository_4) { create(:container_repository, project: project) }
let_it_be(:project_synced_group) { create(:project, group: synced_group) } let_it_be(:container_repository_5) { create(:container_repository, project: project) }
let_it_be(:project_nested_group) { create(:project, group: nested_group) } let_it_be(:container_repository_6) { create(:container_repository, project: project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:container_repository_1) { create(:container_repository, project: project_synced_group) }
let_it_be(:container_repository_2) { create(:container_repository, project: project_nested_group) }
let_it_be(:container_repository_3) { create(:container_repository) }
let_it_be(:container_repository_4) { create(:container_repository) }
let_it_be(:container_repository_5) { create(:container_repository, project: project_broken_storage) }
let_it_be(:container_repository_6) { create(:container_repository, project: project_broken_storage) }
subject { described_class.new(current_node_id: secondary.id) }
before do before do
stub_current_geo_node(secondary)
stub_registry_replication_config(enabled: true) stub_registry_replication_config(enabled: true)
end end
...@@ -51,140 +41,6 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do ...@@ -51,140 +41,6 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do
end end
end end
describe '#find_registry_differences' do
context 'untracked IDs' do
before do
create(:container_repository_registry, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
create(:container_repository_registry, container_repository_id: container_repository_5.id)
end
it 'includes container registries IDs without an entry on the tracking database' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id, container_repository_4.id, container_repository_6.id])
end
it 'excludes container registries outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(container_repository_4.id..container_repository_6.id)
expect(untracked_ids).to match_array([container_repository_4.id, container_repository_6.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes container_registry IDs that projects are not in the selected namespaces' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes container_registry IDs that projects are not in the selected shards' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
before do
container_repository_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
it 'excludes IDs outside the ID range' do
range = (container_repository_1.id + 1)..ContainerRepository.maximum(:id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked container_registry' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_3.id) }
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_3.id..container_repository_3.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked container_registry' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
context 'excluded from selective sync' do
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_5.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_5.id..container_repository_5.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#find_unsynced_registries' do describe '#find_unsynced_registries' do
let_it_be(:registry_container_registry_1) { create(:container_repository_registry, :synced, container_repository_id: container_repository_1.id) } let_it_be(:registry_container_registry_1) { create(:container_repository_registry, :synced, container_repository_id: container_repository_1.id) }
let_it_be(:registry_container_registry_2) { create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_2.id) } let_it_be(:registry_container_registry_2) { create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_2.id) }
......
...@@ -2,211 +2,41 @@ ...@@ -2,211 +2,41 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::DesignRegistryFinder, :geo do RSpec.describe Geo::DesignRegistryFinder, :geo do
include ::EE::GeoHelpers let_it_be(:group) { create(:group) }
let_it_be(:project_1) { create(:project, group: group) }
let_it_be(:secondary) { create(:geo_node) } let_it_be(:project_2) { create(:project, group: group) }
let_it_be(:synced_group) { create(:group) } let_it_be(:project_3) { create(:project, group: group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) } let_it_be(:project_4) { create(:project, group: group) }
let_it_be(:project_1) { create(:project, group: synced_group) } let_it_be(:project_5) { create(:project, :broken_storage, group: group) }
let_it_be(:project_2) { create(:project, group: nested_group) } let_it_be(:project_6) { create(:project, :broken_storage, group: group) }
let_it_be(:project_3) { create(:project) } let_it_be(:project_7) { create(:project, group: group) }
let_it_be(:project_4) { create(:project) }
let_it_be(:project_5) { create(:project, :broken_storage) } let_it_be(:registry_project_1) { create(:geo_design_registry, :synced, project_id: project_1.id) }
let_it_be(:project_6) { create(:project, :broken_storage) } let_it_be(:registry_project_2) { create(:geo_design_registry, :sync_failed, project_id: project_2.id) }
let_it_be(:project_7) { create(:project) } let_it_be(:registry_project_3) { create(:geo_design_registry, project_id: project_3.id, last_synced_at: nil) }
let_it_be(:registry_project_4) { create(:geo_design_registry, project_id: project_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
subject { described_class.new(current_node_id: secondary.id) } let_it_be(:registry_project_5) { create(:geo_design_registry, project_id: project_5.id, last_synced_at: 6.days.ago) }
let_it_be(:registry_project_6) { create(:geo_design_registry, project_id: project_6.id, last_synced_at: nil) }
before do
stub_current_geo_node(secondary)
end
describe '#registry_count' do describe '#registry_count' do
it 'returns number of desgin registries' do it 'returns number of desgin registries' do
create(:geo_design_registry, :synced, project_id: project_1.id) expect(subject.registry_count).to eq(6)
create(:geo_design_registry, :sync_failed, project_id: project_2.id)
expect(subject.registry_count).to eq(2)
end end
end end
describe '#synced_count' do describe '#synced_count' do
it 'returns number of synced registries' do it 'returns number of synced registries' do
create(:geo_design_registry, :synced, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_2.id)
expect(subject.synced_count).to eq(1) expect(subject.synced_count).to eq(1)
end end
end end
describe '#failed_count' do describe '#failed_count' do
it 'returns number of failed registries' do it 'returns number of failed registries' do
create(:geo_design_registry, :synced, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_2.id)
expect(subject.failed_count).to eq(1) expect(subject.failed_count).to eq(1)
end end
end end
describe '#find_registry_differences' do
before_all do
create(:design, project: project_1)
create(:design, project: project_2)
create(:design, project: project_3)
create(:design, project: project_4)
create(:design, project: project_5)
create(:design, project: project_6)
end
context 'untracked IDs' do
before do
create(:geo_design_registry, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_3.id)
create(:geo_design_registry, project_id: project_5.id)
end
it 'includes project IDs without an entry on the tracking database' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id, project_4.id, project_6.id])
end
it 'excludes projects outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(project_4.id..project_6.id)
expect(untracked_ids).to match_array([project_4.id, project_6.id])
end
it 'excludes projects without designs' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).not_to include([project_7])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_design_registry, project_id: project_1.id) }
before do
project_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
it 'excludes IDs outside the ID range' do
range = (project_1.id + 1)..Project.maximum(:id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked project' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_3.id) }
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_3.id..project_3.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked project' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
context 'excluded from selective sync' do
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_5.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_5.id..project_5.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#find_unsynced_registries' do describe '#find_unsynced_registries' do
let!(:registry_project_1) { create(:geo_design_registry, :synced, project_id: project_1.id) }
let!(:registry_project_2) { create(:geo_design_registry, :sync_failed, project_id: project_2.id) }
let!(:registry_project_3) { create(:geo_design_registry, project_id: project_3.id, last_synced_at: nil) }
let!(:registry_project_4) { create(:geo_design_registry, project_id: project_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let!(:registry_project_5) { create(:geo_design_registry, project_id: project_5.id, last_synced_at: 6.days.ago) }
let!(:registry_project_6) { create(:geo_design_registry, project_id: project_6.id, last_synced_at: nil) }
it 'returns registries for projects that have never been synced' do it 'returns registries for projects that have never been synced' do
registries = subject.find_unsynced_registries(batch_size: 10) registries = subject.find_unsynced_registries(batch_size: 10)
...@@ -221,13 +51,6 @@ RSpec.describe Geo::DesignRegistryFinder, :geo do ...@@ -221,13 +51,6 @@ RSpec.describe Geo::DesignRegistryFinder, :geo do
end end
describe '#find_failed_registries' do describe '#find_failed_registries' do
let!(:registry_project_1) { create(:geo_design_registry, :synced, project_id: project_1.id) }
let!(:registry_project_2) { create(:geo_design_registry, :sync_failed, project_id: project_2.id) }
let!(:registry_project_3) { create(:geo_design_registry, project_id: project_3.id, last_synced_at: nil) }
let!(:registry_project_4) { create(:geo_design_registry, project_id: project_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let!(:registry_project_5) { create(:geo_design_registry, project_id: project_5.id, last_synced_at: 6.days.ago) }
let!(:registry_project_6) { create(:geo_design_registry, project_id: project_6.id, last_synced_at: nil) }
it 'returns registries for projects that have been recently updated' do it 'returns registries for projects that have been recently updated' do
registries = subject.find_failed_registries(batch_size: 10) registries = subject.find_failed_registries(batch_size: 10)
......
...@@ -5,7 +5,6 @@ require 'spec_helper' ...@@ -5,7 +5,6 @@ require 'spec_helper'
RSpec.describe Geo::FileRegistryFinder, :geo do RSpec.describe Geo::FileRegistryFinder, :geo do
context 'with abstract methods' do context 'with abstract methods' do
%w[ %w[
replicables
registry_class registry_class
].each do |required_method| ].each do |required_method|
it "requires subclasses to implement #{required_method}" do it "requires subclasses to implement #{required_method}" do
...@@ -13,24 +12,4 @@ RSpec.describe Geo::FileRegistryFinder, :geo do ...@@ -13,24 +12,4 @@ RSpec.describe Geo::FileRegistryFinder, :geo do
end end
end end
end end
describe '#local_storage_only?' do
subject { described_class.new(current_node_id: geo_node.id) }
context 'sync_object_storage is enabled' do
let(:geo_node) { create(:geo_node, sync_object_storage: true) }
it 'returns false' do
expect(subject.local_storage_only?).to be_falsey
end
end
context 'sync_object_storage is disabled' do
let(:geo_node) { create(:geo_node, sync_object_storage: false) }
it 'returns true' do
expect(subject.local_storage_only?).to be_truthy
end
end
end
end end
...@@ -3,335 +3,62 @@ ...@@ -3,335 +3,62 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::JobArtifactRegistryFinder, :geo do RSpec.describe Geo::JobArtifactRegistryFinder, :geo do
include ::EE::GeoHelpers
let_it_be(:secondary) { create(:geo_node) }
before do before do
stub_current_geo_node(secondary)
stub_artifacts_object_storage stub_artifacts_object_storage
end end
let_it_be(:synced_group) { create(:group) } let_it_be(:project) { create(:project) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) } let_it_be(:ci_job_artifact_1) { create(:ci_job_artifact, project: project) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) } let_it_be(:ci_job_artifact_2) { create(:ci_job_artifact, project: project) }
let_it_be(:unsynced_project) { create(:project) } let_it_be(:ci_job_artifact_3) { create(:ci_job_artifact, project: project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) } let_it_be(:ci_job_artifact_4) { create(:ci_job_artifact, project: project) }
let_it_be(:ci_job_artifact_5) { create(:ci_job_artifact, project: project) }
let!(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) } let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store, project: project) }
let!(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) } let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store, project: project) }
let!(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) } let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store, project: project) }
let!(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) }
let!(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) } let_it_be(:registry_ci_job_artifact_1) { create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id) }
let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) } let_it_be(:registry_ci_job_artifact_2) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true) }
let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) } let_it_be(:registry_ci_job_artifact_3) { create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id) }
let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) } let_it_be(:registry_ci_job_artifact_4) { create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id) }
let_it_be(:registry_ci_job_artifact_5) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago) }
subject { described_class.new(current_node_id: secondary.id) } let!(:registry_ci_job_artifact_remote_1) { create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id) }
let!(:registry_ci_job_artifact_remote_2) { create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true) }
let!(:registry_ci_job_artifact_remote_3) { create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id) }
describe '#registry_count' do describe '#registry_count' do
it 'counts registries for job artifacts' do it 'counts registries for job artifacts' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.registry_count).to eq 8 expect(subject.registry_count).to eq 8
end end
end end
describe '#synced_count' do describe '#synced_count' do
it 'counts registries that has been synced' do it 'counts registries that has been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id) expect(subject.synced_count).to eq 2
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.synced_count).to eq 3
end end
end end
describe '#failed_count' do describe '#failed_count' do
it 'counts registries that sync has failed' do it 'counts registries that sync has failed' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id) expect(subject.failed_count).to eq 4
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.failed_count).to eq 3
end end
end end
describe '#synced_missing_on_primary_count' do describe '#synced_missing_on_primary_count' do
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id) expect(subject.synced_missing_on_primary_count).to eq 2
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.synced_missing_on_primary_count).to eq 3
end
end
describe '#find_registry_differences' do
# Untracked IDs should not contain any of these expired job artifacts.
let!(:ci_job_artifact_6) { create(:ci_job_artifact, :expired, project: synced_project) }
let!(:ci_job_artifact_7) { create(:ci_job_artifact, :expired, project: unsynced_project) }
let!(:ci_job_artifact_8) { create(:ci_job_artifact, :expired, project: project_broken_storage) }
let!(:ci_job_artifact_remote_4) { create(:ci_job_artifact, :expired, :remote_store) }
context 'untracked IDs' do
before do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
end
it 'includes job artifact IDs without an entry on the tracking database' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id])
end
it 'excludes job artifacts outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id)
expect(untracked_ids).to match_array(
[ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes job artifacts in object storage' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with an expired registry' do
let!(:expired) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_6.id) }
it 'includes expired tracked IDs that exists in the model table' do
range = ci_job_artifact_6.id..ci_job_artifact_6.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_6.id])
end
it 'excludes IDs outside the ID range' do
range = (ci_job_artifact_6.id + 1)..(ci_job_artifact_6.id + 10)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id])
end
end
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
it 'includes expired tracked IDs that are in selectively synced projects' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_6.id)
range = ci_job_artifact_6.id..ci_job_artifact_6.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_6.id])
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id])
end
end
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced shards' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
it 'includes expired tracked IDs that are in selectively synced shards' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_8.id)
range = ci_job_artifact_8.id..ci_job_artifact_8.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_8.id])
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked job artifact' do
context 'in object storage' do
it 'includes tracked job artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_1.id)
range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id])
end
it 'includes expired tracked IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_4.id)
range = ci_job_artifact_remote_4.id..ci_job_artifact_remote_4.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_4.id])
end
end
context 'not in object storage' do
it 'excludes tracked job artifact IDs that are not in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
range = ci_job_artifact_1.id..ci_job_artifact_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end end
end end
describe '#find_unsynced_registries' do describe '#find_unsynced_registries' do
it 'returns registries for job artifacts that have never been synced' do it 'returns registries for job artifacts that have never been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
registry_ci_job_artifact_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_unsynced_registries(batch_size: 10) registries = subject.find_unsynced_registries(batch_size: 10)
expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3) expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_unsynced_registries(batch_size: 10, except_ids: [ci_job_artifact_3.id]) registries = subject.find_unsynced_registries(batch_size: 10, except_ids: [ci_job_artifact_3.id])
expect(registries).to match_ids(registry_ci_job_artifact_remote_3) expect(registries).to match_ids(registry_ci_job_artifact_remote_3)
...@@ -340,31 +67,13 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo do ...@@ -340,31 +67,13 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo do
describe '#find_failed_registries' do describe '#find_failed_registries' do
it 'returns registries for job artifacts that have failed to sync' do it 'returns registries for job artifacts that have failed to sync' do
registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
registry_ci_job_artifact_4 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_failed_registries(batch_size: 10) registries = subject.find_failed_registries(batch_size: 10)
expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_4, registry_ci_job_artifact_remote_1) expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_4, registry_ci_job_artifact_remote_1, registry_ci_job_artifact_remote_2)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id) registries = subject.find_failed_registries(batch_size: 10, except_ids: [ci_job_artifact_4.id, ci_job_artifact_remote_2.id])
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_failed_registries(batch_size: 10, except_ids: [ci_job_artifact_4.id])
expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_remote_1) expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_remote_1)
end end
...@@ -372,30 +81,12 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo do ...@@ -372,30 +81,12 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo do
describe '#find_retryable_synced_missing_on_primary_registries' do describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries for job artifacts that have been synced and are missing on the primary' do it 'returns registries for job artifacts that have been synced and are missing on the primary' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
registry_ci_job_artifact_5 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10) registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(registries).to match_ids(registry_ci_job_artifact_2, registry_ci_job_artifact_5) expect(registries).to match_ids(registry_ci_job_artifact_2, registry_ci_job_artifact_5)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [ci_job_artifact_5.id]) registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [ci_job_artifact_5.id])
expect(registries).to match_ids(registry_ci_job_artifact_2) expect(registries).to match_ids(registry_ci_job_artifact_2)
......
...@@ -3,282 +3,60 @@ ...@@ -3,282 +3,60 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::LfsObjectRegistryFinder, :geo do RSpec.describe Geo::LfsObjectRegistryFinder, :geo do
let_it_be(:secondary) { create(:geo_node) }
before do before do
stub_lfs_object_storage stub_lfs_object_storage
end end
let!(:lfs_object_1) { create(:lfs_object) } let_it_be(:lfs_object_1) { create(:lfs_object) }
let!(:lfs_object_2) { create(:lfs_object) } let_it_be(:lfs_object_2) { create(:lfs_object) }
let!(:lfs_object_3) { create(:lfs_object) } let_it_be(:lfs_object_3) { create(:lfs_object) }
let!(:lfs_object_4) { create(:lfs_object) } let_it_be(:lfs_object_4) { create(:lfs_object) }
let!(:lfs_object_5) { create(:lfs_object) } let_it_be(:lfs_object_5) { create(:lfs_object) }
let!(:lfs_object_remote_1) { create(:lfs_object, :object_storage) } let!(:lfs_object_remote_1) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_2) { create(:lfs_object, :object_storage) } let!(:lfs_object_remote_2) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_3) { create(:lfs_object, :object_storage) } let!(:lfs_object_remote_3) { create(:lfs_object, :object_storage) }
subject { described_class.new(current_node_id: secondary.id) } let_it_be(:registry_lfs_object_1) { create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id) }
let_it_be(:registry_lfs_object_2) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true) }
let_it_be(:registry_lfs_object_3) { create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id) }
let_it_be(:registry_lfs_object_4) { create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id) }
let_it_be(:registry_lfs_object_5) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago) }
let!(:registry_lfs_object_remote_1) { create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id) }
let!(:registry_lfs_object_remote_2) { create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true) }
let!(:registry_lfs_object_remote_3) { create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id) }
describe '#registry_count' do describe '#registry_count' do
it 'counts registries for LFS objects' do it 'counts registries for LFS objects' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
expect(subject.registry_count).to eq 8 expect(subject.registry_count).to eq 8
end end
end end
describe '#synced_count' do describe '#synced_count' do
it 'counts registries that has been synced' do it 'counts registries that has been synced' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id) expect(subject.synced_count).to eq 2
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
expect(subject.synced_count).to eq 3
end end
end end
describe '#failed_count' do describe '#failed_count' do
it 'counts registries that sync has failed' do it 'counts registries that sync has failed' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id) expect(subject.failed_count).to eq 4
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
expect(subject.failed_count).to eq 3
end end
end end
describe '#synced_missing_on_primary_count' do describe '#synced_missing_on_primary_count' do
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id) expect(subject.synced_missing_on_primary_count).to eq 2
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
expect(subject.synced_missing_on_primary_count).to eq 3
end
end
describe '#find_registry_differences' do
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
context 'untracked IDs' do
before do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_4.id)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_3)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_4)
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_5)
end
it 'includes LFS object IDs without an entry on the tracking database' do
untracked_ids, _ = subject.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array(
[lfs_object_2.id, lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id, lfs_object_remote_3.id])
end
it 'excludes LFS objects outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(lfs_object_3.id..lfs_object_remote_2.id)
expect(untracked_ids).to match_array(
[lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes LFS objects in object storage' do
untracked_ids, _ = subject.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id, lfs_object_5.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_lfs_object_registry, lfs_object_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked LFS object' do
context 'in object storage' do
it 'includes tracked LFS object IDs that are in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_1.id)
range = lfs_object_remote_1.id..lfs_object_remote_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_remote_1.id])
end
end
context 'not in object storage' do
it 'excludes tracked LFS object IDs that are not in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
range = lfs_object_1.id..lfs_object_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end end
end end
describe '#find_unsynced_registries' do describe '#find_unsynced_registries' do
it 'returns registries for LFS objects that have never been synced' do it 'returns registries for LFS objects that have never been synced' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
registry_lfs_object_3 = create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
registry_lfs_object_remote_3 = create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_unsynced_registries(batch_size: 10) registries = subject.find_unsynced_registries(batch_size: 10)
expect(registries).to match_ids(registry_lfs_object_3, registry_lfs_object_remote_3) expect(registries).to match_ids(registry_lfs_object_3, registry_lfs_object_remote_3)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
registry_lfs_object_remote_3 = create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_unsynced_registries(batch_size: 10, except_ids: [lfs_object_3.id]) registries = subject.find_unsynced_registries(batch_size: 10, except_ids: [lfs_object_3.id])
expect(registries).to match_ids(registry_lfs_object_remote_3) expect(registries).to match_ids(registry_lfs_object_remote_3)
...@@ -287,31 +65,13 @@ RSpec.describe Geo::LfsObjectRegistryFinder, :geo do ...@@ -287,31 +65,13 @@ RSpec.describe Geo::LfsObjectRegistryFinder, :geo do
describe '#find_failed_registries' do describe '#find_failed_registries' do
it 'returns registries for LFS objects that have failed to sync' do it 'returns registries for LFS objects that have failed to sync' do
registry_lfs_object_1 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
registry_lfs_object_4 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_lfs_object_remote_1 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_failed_registries(batch_size: 10) registries = subject.find_failed_registries(batch_size: 10)
expect(registries).to match_ids(registry_lfs_object_1, registry_lfs_object_4, registry_lfs_object_remote_1) expect(registries).to match_ids(registry_lfs_object_1, registry_lfs_object_4, registry_lfs_object_remote_1, registry_lfs_object_remote_2)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
registry_lfs_object_1 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id) registries = subject.find_failed_registries(batch_size: 10, except_ids: [lfs_object_4.id, lfs_object_remote_2.id])
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_lfs_object_remote_1 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_failed_registries(batch_size: 10, except_ids: [lfs_object_4.id])
expect(registries).to match_ids(registry_lfs_object_1, registry_lfs_object_remote_1) expect(registries).to match_ids(registry_lfs_object_1, registry_lfs_object_remote_1)
end end
...@@ -319,30 +79,12 @@ RSpec.describe Geo::LfsObjectRegistryFinder, :geo do ...@@ -319,30 +79,12 @@ RSpec.describe Geo::LfsObjectRegistryFinder, :geo do
describe '#find_retryable_synced_missing_on_primary_registries' do describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries for LFS objects that have been synced and are missing on the primary' do it 'returns registries for LFS objects that have been synced and are missing on the primary' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
registry_lfs_object_2 = create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
registry_lfs_object_5 = create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10) registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(registries).to match_ids(registry_lfs_object_2, registry_lfs_object_5) expect(registries).to match_ids(registry_lfs_object_2, registry_lfs_object_5)
end end
it 'excludes except_ids' do it 'excludes except_ids' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
registry_lfs_object_2 = create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [lfs_object_5.id]) registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [lfs_object_5.id])
expect(registries).to match_ids(registry_lfs_object_2) expect(registries).to match_ids(registry_lfs_object_2)
......
...@@ -3,23 +3,23 @@ ...@@ -3,23 +3,23 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::ContainerRepositoryRegistry, :geo do RSpec.describe Geo::ContainerRepositoryRegistry, :geo do
include ::EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::ContainerRepositoryRegistry do it_behaves_like 'a BulkInsertSafe model', Geo::ContainerRepositoryRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:container_repository_registry, 10, created_at: Time.zone.now) } let(:valid_items_for_bulk_insertion) { build_list(:container_repository_registry, 10, created_at: Time.zone.now) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end end
let_it_be(:registry) { create(:container_repository_registry) } it_behaves_like 'a Geo registry' do
let(:registry) { create(:container_repository_registry) }
end
describe 'relationships' do describe 'relationships' do
it { is_expected.to belong_to(:container_repository) } it { is_expected.to belong_to(:container_repository) }
end end
it_behaves_like 'a Geo registry' do
let(:registry) { create(:container_repository_registry) }
end
describe '#finish_sync!' do describe '#finish_sync!' do
let(:registry) { create(:container_repository_registry, :sync_started) } let_it_be(:registry) { create(:container_repository_registry, :sync_started) }
it 'finishes registry record' do it 'finishes registry record' do
registry.finish_sync! registry.finish_sync!
...@@ -58,6 +58,158 @@ RSpec.describe Geo::ContainerRepositoryRegistry, :geo do ...@@ -58,6 +58,158 @@ RSpec.describe Geo::ContainerRepositoryRegistry, :geo do
end end
end end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_synced_group) { create(:project, group: synced_group) }
let_it_be(:project_nested_group) { create(:project, group: nested_group) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:container_repository_1) { create(:container_repository, project: project_synced_group) }
let_it_be(:container_repository_2) { create(:container_repository, project: project_nested_group) }
let_it_be(:container_repository_3) { create(:container_repository) }
let_it_be(:container_repository_4) { create(:container_repository) }
let_it_be(:container_repository_5) { create(:container_repository, project: project_broken_storage) }
let_it_be(:container_repository_6) { create(:container_repository, project: project_broken_storage) }
before do
stub_current_geo_node(secondary)
stub_registry_replication_config(enabled: true)
end
context 'untracked IDs' do
before do
create(:container_repository_registry, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
create(:container_repository_registry, container_repository_id: container_repository_5.id)
end
it 'includes container registries IDs without an entry on the tracking database' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id, container_repository_4.id, container_repository_6.id])
end
it 'excludes container registries outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(container_repository_4.id..container_repository_6.id)
expect(untracked_ids).to match_array([container_repository_4.id, container_repository_6.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes container_registry IDs that projects are not in the selected namespaces' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes container_registry IDs that projects are not in the selected shards' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
before do
container_repository_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
it 'excludes IDs outside the ID range' do
range = (container_repository_1.id + 1)..ContainerRepository.maximum(:id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked container_registry' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_3.id) }
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_3.id..container_repository_3.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked container_registry' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
context 'excluded from selective sync' do
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_5.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_5.id..container_repository_5.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '.replication_enabled?' do describe '.replication_enabled?' do
it 'returns true when registry replication is enabled' do it 'returns true when registry replication is enabled' do
stub_geo_setting(registry_replication: { enabled: true }) stub_geo_setting(registry_replication: { enabled: true })
......
...@@ -3,6 +3,8 @@ ...@@ -3,6 +3,8 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::DesignRegistry, :geo do RSpec.describe Geo::DesignRegistry, :geo do
include ::EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::DesignRegistry do it_behaves_like 'a BulkInsertSafe model', Geo::DesignRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_design_registry, 10, created_at: Time.zone.now) } let(:valid_items_for_bulk_insertion) { build_list(:geo_design_registry, 10, created_at: Time.zone.now) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
...@@ -16,6 +18,172 @@ RSpec.describe Geo::DesignRegistry, :geo do ...@@ -16,6 +18,172 @@ RSpec.describe Geo::DesignRegistry, :geo do
let(:registry) { create(:geo_design_registry) } let(:registry) { create(:geo_design_registry) }
end end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_1) { create(:project, group: synced_group) }
let_it_be(:project_2) { create(:project, group: nested_group) }
let_it_be(:project_3) { create(:project) }
let_it_be(:project_4) { create(:project) }
let_it_be(:project_5) { create(:project, :broken_storage) }
let_it_be(:project_6) { create(:project, :broken_storage) }
let_it_be(:project_7) { create(:project) }
before do
stub_current_geo_node(secondary)
end
before_all do
create(:design, project: project_1)
create(:design, project: project_2)
create(:design, project: project_3)
create(:design, project: project_4)
create(:design, project: project_5)
create(:design, project: project_6)
end
context 'untracked IDs' do
before do
create(:geo_design_registry, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_3.id)
create(:geo_design_registry, project_id: project_5.id)
end
it 'includes project IDs without an entry on the tracking database' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id, project_4.id, project_6.id])
end
it 'excludes projects outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(project_4.id..project_6.id)
expect(untracked_ids).to match_array([project_4.id, project_6.id])
end
it 'excludes projects without designs' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).not_to include([project_7])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_design_registry, project_id: project_1.id) }
before do
project_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
it 'excludes IDs outside the ID range' do
range = (project_1.id + 1)..Project.maximum(:id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked project' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_3.id) }
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_3.id..project_3.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked project' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
context 'excluded from selective sync' do
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_5.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_5.id..project_5.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#search' do describe '#search' do
let!(:design_registry) { create(:geo_design_registry) } let!(:design_registry) { create(:geo_design_registry) }
let!(:failed_registry) { create(:geo_design_registry, :sync_failed) } let!(:failed_registry) { create(:geo_design_registry, :sync_failed) }
......
...@@ -23,4 +23,243 @@ RSpec.describe Geo::JobArtifactRegistry, :geo do ...@@ -23,4 +23,243 @@ RSpec.describe Geo::JobArtifactRegistry, :geo do
expect(described_class.where(id: ids).pluck(:success)).to eq([false]) expect(described_class.where(id: ids).pluck(:success)).to eq([false])
end end
end end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
stub_artifacts_object_storage
end
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) }
let_it_be(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let_it_be(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let_it_be(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) }
let_it_be(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) }
# Untracked IDs should not contain any of these expired job artifacts.
let_it_be(:ci_job_artifact_6) { create(:ci_job_artifact, :expired, project: synced_project) }
let_it_be(:ci_job_artifact_7) { create(:ci_job_artifact, :expired, project: unsynced_project) }
let_it_be(:ci_job_artifact_8) { create(:ci_job_artifact, :expired, project: project_broken_storage) }
let!(:ci_job_artifact_remote_4) { create(:ci_job_artifact, :expired, :remote_store) }
context 'untracked IDs' do
before do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
end
it 'includes job artifact IDs without an entry on the tracking database' do
untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id])
end
it 'excludes job artifacts outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id)
expect(untracked_ids).to match_array(
[ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes job artifacts in object storage' do
untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with an expired registry' do
let!(:expired) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_6.id) }
it 'includes expired tracked IDs that exists in the model table' do
range = ci_job_artifact_6.id..ci_job_artifact_6.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_6.id])
end
it 'excludes IDs outside the ID range' do
range = (ci_job_artifact_6.id + 1)..(ci_job_artifact_6.id + 10)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id])
end
end
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced projects' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
it 'includes expired tracked IDs that are in selectively synced projects' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_6.id)
range = ci_job_artifact_6.id..ci_job_artifact_6.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_6.id])
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id])
end
end
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced shards' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
it 'includes expired tracked IDs that are in selectively synced shards' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_8.id)
range = ci_job_artifact_8.id..ci_job_artifact_8.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_8.id])
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked job artifact' do
context 'in object storage' do
it 'includes tracked job artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_1.id)
range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id])
end
it 'includes expired tracked IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_4.id)
range = ci_job_artifact_remote_4.id..ci_job_artifact_remote_4.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_4.id])
end
end
context 'not in object storage' do
it 'excludes tracked job artifact IDs that are not in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
range = ci_job_artifact_1.id..ci_job_artifact_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
end end
...@@ -3,12 +3,203 @@ ...@@ -3,12 +3,203 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::LfsObjectRegistry, :geo do RSpec.describe Geo::LfsObjectRegistry, :geo do
describe 'relationships' do include EE::GeoHelpers
it { is_expected.to belong_to(:lfs_object).class_name('LfsObject') }
end
it_behaves_like 'a BulkInsertSafe model', Geo::LfsObjectRegistry do it_behaves_like 'a BulkInsertSafe model', Geo::LfsObjectRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_lfs_object_registry, 10) } let(:valid_items_for_bulk_insertion) { build_list(:geo_lfs_object_registry, 10) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end end
describe 'relationships' do
it { is_expected.to belong_to(:lfs_object).class_name('LfsObject') }
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
before do
stub_current_geo_node(secondary)
stub_lfs_object_storage
end
let_it_be(:lfs_object_1) { create(:lfs_object) }
let_it_be(:lfs_object_2) { create(:lfs_object) }
let_it_be(:lfs_object_3) { create(:lfs_object) }
let_it_be(:lfs_object_4) { create(:lfs_object) }
let_it_be(:lfs_object_5) { create(:lfs_object) }
let!(:lfs_object_remote_1) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_2) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_3) { create(:lfs_object, :object_storage) }
context 'untracked IDs' do
before do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_4.id)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_3)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_4)
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_5)
end
it 'includes LFS object IDs without an entry on the tracking database' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array(
[lfs_object_2.id, lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id, lfs_object_remote_3.id])
end
it 'excludes LFS objects outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(lfs_object_3.id..lfs_object_remote_2.id)
expect(untracked_ids).to match_array(
[lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes LFS objects in object storage' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id, lfs_object_5.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_lfs_object_registry, lfs_object_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked LFS object' do
context 'in object storage' do
it 'includes tracked LFS object IDs that are in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_1.id)
range = lfs_object_remote_1.id..lfs_object_remote_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_remote_1.id])
end
end
context 'not in object storage' do
it 'excludes tracked LFS object IDs that are not in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
range = lfs_object_1.id..lfs_object_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
end end
...@@ -3,8 +3,7 @@ ...@@ -3,8 +3,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::UploadRegistry, :geo do RSpec.describe Geo::UploadRegistry, :geo do
let!(:failed) { create(:geo_upload_registry, :failed) } include EE::GeoHelpers
let!(:synced) { create(:geo_upload_registry) }
it_behaves_like 'a BulkInsertSafe model', Geo::UploadRegistry do it_behaves_like 'a BulkInsertSafe model', Geo::UploadRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_upload_registry, 10, created_at: Time.zone.now) } let(:valid_items_for_bulk_insertion) { build_list(:geo_upload_registry, 10, created_at: Time.zone.now) }
...@@ -17,20 +16,75 @@ RSpec.describe Geo::UploadRegistry, :geo do ...@@ -17,20 +16,75 @@ RSpec.describe Geo::UploadRegistry, :geo do
expect(described_class.find(registry.id).upload).to be_an_instance_of(Upload) expect(described_class.find(registry.id).upload).to be_an_instance_of(Upload)
end end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:project) { create(:project) }
let_it_be(:upload_1) { create(:upload, model: project) }
let_it_be(:upload_2) { create(:upload, model: project) }
let_it_be(:upload_3) { create(:upload, :issuable_upload, model: project) }
let_it_be(:upload_4) { create(:upload, model: project) }
let_it_be(:upload_5) { create(:upload, model: project) }
let_it_be(:upload_6) { create(:upload, :personal_snippet_upload) }
let_it_be(:upload_7) { create(:upload, :object_storage, model: project) }
let_it_be(:upload_8) { create(:upload, :object_storage, model: project) }
let_it_be(:upload_9) { create(:upload, :object_storage, model: project) }
before do
stub_current_geo_node(secondary)
end
it 'returns untracked IDs as well as tracked IDs that are unused', :aggregate_failures do
max_id = Upload.maximum(:id)
create(:geo_upload_registry, :avatar, file_id: upload_1.id)
create(:geo_upload_registry, :file, file_id: upload_3.id)
create(:geo_upload_registry, :avatar, file_id: upload_5.id)
create(:geo_upload_registry, :personal_file, file_id: upload_6.id)
create(:geo_upload_registry, :avatar, file_id: upload_7.id)
unused_registry_1 = create(:geo_upload_registry, :attachment, file_id: max_id + 1)
unused_registry_2 = create(:geo_upload_registry, :personal_file, file_id: max_id + 2)
range = 1..(max_id + 2)
untracked, unused = described_class.find_registry_differences(range)
expected_untracked = [
[upload_2.id, 'avatar'],
[upload_4.id, 'avatar'],
[upload_8.id, 'avatar'],
[upload_9.id, 'avatar']
]
expected_unused = [
[unused_registry_1.file_id, 'attachment'],
[unused_registry_2.file_id, 'personal_file']
]
expect(untracked).to match_array(expected_untracked)
expect(unused).to match_array(expected_unused)
end
end
describe '.failed' do describe '.failed' do
it 'returns registries in the failed state' do it 'returns registries in the failed state' do
failed = create(:geo_upload_registry, :failed)
create(:geo_upload_registry)
expect(described_class.failed).to match_ids(failed) expect(described_class.failed).to match_ids(failed)
end end
end end
describe '.synced' do describe '.synced' do
it 'returns registries in the synced state' do it 'returns registries in the synced state' do
create(:geo_upload_registry, :failed)
synced = create(:geo_upload_registry)
expect(described_class.synced).to match_ids(synced) expect(described_class.synced).to match_ids(synced)
end end
end end
describe '.retry_due' do describe '.retry_due' do
it 'returns registries in the synced state' do it 'returns registries in the synced state' do
failed = create(:geo_upload_registry, :failed)
synced = create(:geo_upload_registry)
retry_yesterday = create(:geo_upload_registry, retry_at: Date.yesterday) retry_yesterday = create(:geo_upload_registry, retry_at: Date.yesterday)
create(:geo_upload_registry, retry_at: Date.tomorrow) create(:geo_upload_registry, retry_at: Date.tomorrow)
...@@ -40,6 +94,8 @@ RSpec.describe Geo::UploadRegistry, :geo do ...@@ -40,6 +94,8 @@ RSpec.describe Geo::UploadRegistry, :geo do
describe '.pending' do describe '.pending' do
it 'returns registries that are never synced' do it 'returns registries that are never synced' do
create(:geo_upload_registry, :failed)
create(:geo_upload_registry)
pending = create(:geo_upload_registry, retry_count: nil, success: false) pending = create(:geo_upload_registry, retry_count: nil, success: false)
expect(described_class.pending).to match_ids([pending]) expect(described_class.pending).to match_ids([pending])
...@@ -91,6 +147,9 @@ RSpec.describe Geo::UploadRegistry, :geo do ...@@ -91,6 +147,9 @@ RSpec.describe Geo::UploadRegistry, :geo do
end end
describe '#synchronization_state' do describe '#synchronization_state' do
let_it_be(:failed) { create(:geo_upload_registry, :failed) }
let_it_be(:synced) { create(:geo_upload_registry) }
it 'returns :synced for a successful synced registry' do it 'returns :synced for a successful synced registry' do
expect(synced.synchronization_state).to eq(:synced) expect(synced.synchronization_state).to eq(:synced)
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment