Commit d7d65763 authored by Michael Kozono's avatar Michael Kozono

Merge branch '223104-geo-refactor-finders-part-II' into 'master'

Geo - Refactor finders (Part II)

See merge request gitlab-org/gitlab!40542
parents 394217b4 f9ae015f
......@@ -2,50 +2,6 @@
module Geo
class AttachmentRegistryFinder < FileRegistryFinder
# Returns untracked uploads as well as tracked uploads that are unused.
#
# Untracked uploads is an array where each item is a tuple of [id, file_type]
# that is supposed supposed to be synced but don't yet have a registry entry.
#
# Unused uploads is an array where each item is a tuple of [id, file_type]
# that is not supposed to be synced but already have a registry entry. For
# example:
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
#
# @return [Array] the first element is an Array of untracked uploads, and the
# second element is an Array of tracked uploads that are unused.
# For example: [[[1, 'avatar'], [5, 'file']], [[3, 'attachment']]]
def find_registry_differences(range)
# rubocop:disable CodeReuse/ActiveRecord
source =
replicables
.id_in(range)
.pluck(::Upload.arel_table[:id], ::Upload.arel_table[:uploader])
.map! { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
tracked =
registry_class
.model_id_in(range)
.pluck(:file_id, :file_type)
# rubocop:enable CodeReuse/ActiveRecord
untracked = source - tracked
unused_tracked = tracked - source
[untracked, unused_tracked]
end
def replicables
::Upload.replicables_for_geo_node
end
def registry_class
Geo::UploadRegistry
end
......
......@@ -2,48 +2,6 @@
module Geo
class ContainerRepositoryRegistryFinder < RegistryFinder
# Returns Geo::ContainerRepositoryRegistry records that have never been synced.
#
# Does not care about selective sync, because it considers the Registry
# table to be the single source of truth. The contract is that other
# processes need to ensure that the table only contains records that should
# be synced.
#
# Any registries that have ever been synced that currently need to be
# resynced will be handled by other find methods (like
# #find_retryable_dirty_registries)
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: [])
registry_class
.never_synced
.model_id_not_in(except_ids)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
# rubocop:disable CodeReuse/ActiveRecord
def find_retryable_dirty_registries(batch_size:, except_ids: [])
registry_class
.failed
.retry_due
.model_id_not_in(except_ids)
.order(Gitlab::Database.nulls_first_order(:last_synced_at))
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private
def replicables
current_node.container_repositories
end
def registry_class
Geo::ContainerRepositoryRegistry
end
......
......@@ -2,47 +2,6 @@
module Geo
class DesignRegistryFinder < RegistryFinder
# Returns Geo::DesignRegistry records that have never been synced.
#
# Does not care about selective sync, because it considers the Registry
# table to be the single source of truth. The contract is that other
# processes need to ensure that the table only contains records that should
# be synced.
#
# Any registries that have ever been synced that currently need to be
# resynced will be handled by other find methods (like
# #find_retryable_dirty_registries)
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: [])
registry_class
.never_synced
.model_id_not_in(except_ids)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
# rubocop:disable CodeReuse/ActiveRecord
def find_retryable_dirty_registries(batch_size:, except_ids: [])
registry_class
.updated_recently
.model_id_not_in(except_ids)
.order(Gitlab::Database.nulls_first_order(:last_synced_at))
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
private
def replicables
current_node.designs
end
def registry_class
Geo::DesignRegistry
end
......
......@@ -2,59 +2,13 @@
module Geo
class FileRegistryFinder < RegistryFinder
# @!method count_synced_missing_on_primary
# @!method synced_missing_on_primary_count
# Return a count of the registry records for the tracked file_type(s)
# that are synced and missing on the primary
def count_synced_missing_on_primary
def synced_missing_on_primary_count
registry_class.synced.missing_on_primary.count
end
# @!method find_never_synced_registries
# Return an ActiveRecord::Relation of the registry records for the
# tracked file_type(s) that have never been synced.
#
# Does not care about selective sync, because it considers the Registry
# table to be the single source of truth. The contract is that other
# processes need to ensure that the table only contains records that should
# be synced.
#
# Any registries that have ever been synced that currently need to be
# resynced will be handled by other find methods (like
# #find_retryable_failed_registries)
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
#
# rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: [])
registry_class
.never
.model_id_not_in(except_ids)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
# @!method find_retryable_failed_registries
# Return an ActiveRecord::Relation of registry records marked as failed,
# which are ready to be retried, excluding specified IDs, limited to
# batch_size
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
#
# rubocop:disable CodeReuse/ActiveRecord
def find_retryable_failed_registries(batch_size:, except_ids: [])
registry_class
.failed
.retry_due
.model_id_not_in(except_ids)
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
# @!method find_retryable_synced_missing_on_primary_registries
# Return an ActiveRecord::Relation of registry records marked as synced
# and missing on the primary, which are ready to be retried, excluding
......@@ -73,9 +27,5 @@ module Geo
.limit(batch_size)
end
# rubocop:enable CodeReuse/ActiveRecord
def local_storage_only?
!current_node&.sync_object_storage
end
end
end
......@@ -2,10 +2,6 @@
module Geo
class JobArtifactRegistryFinder < FileRegistryFinder
def replicables
::Ci::JobArtifact.replicables_for_geo_node
end
def registry_class
Geo::JobArtifactRegistry
end
......
......@@ -2,18 +2,8 @@
module Geo
class LfsObjectRegistryFinder < FileRegistryFinder
def replicables
local_storage_only? ? lfs_objects.with_files_stored_locally : lfs_objects
end
def registry_class
Geo::LfsObjectRegistry
end
private
def lfs_objects
current_node.lfs_objects
end
end
end
......@@ -2,16 +2,16 @@
module Geo
class ProjectRegistryFinder
# Returns ProjectRegistry records that have never been synced.
# Returns ProjectRegistry records where sync has never been attempted.
#
# Does not care about selective sync, because it considers the Registry
# table to be the single source of truth. The contract is that other
# processes need to ensure that the table only contains records that should
# be synced.
#
# Any registries that have ever been synced that currently need to be
# Any registries that this secondary has ever attempted to sync that currently need to be
# resynced will be handled by other find methods (like
# #find_retryable_dirty_registries)
# #find_registries_needs_sync_again)
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
......@@ -19,28 +19,22 @@ module Geo
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: [])
Geo::ProjectRegistry
.never_synced
.model_id_not_in(except_ids)
.limit(batch_size)
def find_registries_never_attempted_sync(batch_size:, except_ids: [])
registry_class
.find_registries_never_attempted_sync(batch_size: batch_size, except_ids: except_ids)
end
# rubocop:enable CodeReuse/ActiveRecord
# rubocop:disable CodeReuse/ActiveRecord
def find_retryable_dirty_registries(batch_size:, except_ids: [])
Geo::ProjectRegistry
.dirty
.retry_due
.model_id_not_in(except_ids)
.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at))
.limit(batch_size)
def find_registries_needs_sync_again(batch_size:, except_ids: [])
registry_class
.find_registries_needs_sync_again(batch_size: batch_size, except_ids: except_ids)
end
# rubocop:enable CodeReuse/ActiveRecord
# rubocop:disable CodeReuse/ActiveRecord
def find_project_ids_pending_verification(batch_size:, except_ids: [])
Geo::ProjectRegistry
registry_class
.from_union([
repositories_checksummed_pending_verification,
wikis_checksummed_pending_verification
......@@ -53,19 +47,23 @@ module Geo
private
def registry_class
Geo::ProjectRegistry
end
# rubocop:disable CodeReuse/ActiveRecord
def repositories_checksummed_pending_verification
Geo::ProjectRegistry
registry_class
.repositories_checksummed_pending_verification
.select(Geo::ProjectRegistry.arel_table[:project_id])
.select(registry_class.arel_table[:project_id])
end
# rubocop:enable CodeReuse/ActiveRecord
# rubocop:disable CodeReuse/ActiveRecord
def wikis_checksummed_pending_verification
Geo::ProjectRegistry
registry_class
.wikis_checksummed_pending_verification
.select(Geo::ProjectRegistry.arel_table[:project_id])
.select(registry_class.arel_table[:project_id])
end
# rubocop:enable CodeReuse/ActiveRecord
end
......
......@@ -2,56 +2,46 @@
module Geo
class RegistryFinder
include ::Gitlab::Utils::StrongMemoize
attr_reader :current_node_id
def initialize(current_node_id: nil)
@current_node_id = current_node_id
end
# @!method find_registry_differences
# Returns untracked IDs as well as tracked IDs that are unused.
# @!method find_registries_never_attempted_sync
# Return an ActiveRecord::Relation of the registry records for the
# tracked type that this secondary has never attempted to sync.
#
# Untracked IDs are model IDs that are supposed to be synced but don't yet
# have a registry entry.
# Does not care about selective sync, because it considers the Registry
# table to be the single source of truth. The contract is that other
# processes need to ensure that the table only contains records that should
# be synced.
#
# Unused tracked IDs are model IDs that are not supposed to be synced but
# already have a registry entry. For example:
# Any registries that this secondary has ever attempted to sync that currently need to be
# resynced will be handled by other find methods (like
# #find_registries_needs_sync_again)
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
#
# @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused
def find_registry_differences(range)
source_ids = replicables.id_in(range).pluck(replicable_primary_key) # rubocop:disable CodeReuse/ActiveRecord
tracked_ids = registry_class.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
# @!method registry_class
# Return an ActiveRecord::Base class for the tracked type
def registry_class
raise NotImplementedError,
"#{self.class} does not implement #{__method__}"
# rubocop:disable CodeReuse/ActiveRecord
def find_registries_never_attempted_sync(batch_size:, except_ids: [])
registry_class
.find_registries_never_attempted_sync(batch_size: batch_size, except_ids: except_ids)
end
# rubocop:enable CodeReuse/ActiveRecord
# @!method replicables
# Return an ActiveRecord::Relation of the replicable records for the
# tracked file_type(s)
def replicables
raise NotImplementedError,
"#{self.class} does not implement #{__method__}"
# @!method find_registries_needs_sync_again
# Return an ActiveRecord::Relation of registry records marked as failed,
# which are ready to be retried, excluding specified IDs, limited to
# batch_size
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
#
# rubocop:disable CodeReuse/ActiveRecord
def find_registries_needs_sync_again(batch_size:, except_ids: [])
registry_class
.find_registries_needs_sync_again(batch_size: batch_size, except_ids: except_ids)
end
# rubocop:enable CodeReuse/ActiveRecord
# @!method registry_count
# Return a count of the registry records for the tracked type(s)
......@@ -73,19 +63,11 @@ module Geo
registry_class.failed.count
end
private
def current_node
strong_memoize(:current_node) do
GeoNode.find(current_node_id) if current_node_id
end
end
# @!method registry_class
# Return the fully qualified name of the replicable primary key for the
# tracked file_type(s)
def replicable_primary_key
registry_class::MODEL_CLASS.arel_table[:id]
# Return an ActiveRecord::Base class for the tracked type
def registry_class
raise NotImplementedError,
"#{self.class} does not implement #{__method__}"
end
end
end
......@@ -35,12 +35,13 @@ module Geo::ReplicableRegistry
included do
include ::Delay
scope :never, -> { where(last_synced_at: nil) }
scope :failed, -> { with_state(:failed) }
scope :synced, -> { with_state(:synced) }
scope :needs_sync_again, -> { failed.retry_due }
scope :never_attempted_sync, -> { pending.where(last_synced_at: nil) }
scope :ordered, -> { order(:id) }
scope :pending, -> { with_state(:pending) }
scope :retry_due, -> { where(arel_table[:retry_at].eq(nil).or(arel_table[:retry_at].lt(Time.current))) }
scope :ordered, -> { order(:id) }
scope :synced, -> { with_state(:synced) }
state_machine :state, initial: :pending do
state :pending, value: STATE_VALUES[:pending]
......
......@@ -4,9 +4,11 @@ module Geo::Syncable
extend ActiveSupport::Concern
included do
scope :failed, -> { where(success: false) }
scope :synced, -> { where(success: true) }
scope :retry_due, -> { where('retry_at is NULL OR retry_at < ?', Time.current) }
scope :failed, -> { where(success: false).where.not(retry_count: nil) }
scope :missing_on_primary, -> { where(missing_on_primary: true) }
scope :needs_sync_again, -> { failed.retry_due }
scope :never_attempted_sync, -> { where(success: false, retry_count: nil) }
scope :retry_due, -> { where('retry_at is NULL OR retry_at < ?', Time.current) }
scope :synced, -> { where(success: true) }
end
end
......@@ -8,6 +8,12 @@ module EE
scope :project_id_in, ->(ids) { joins(:project).merge(::Project.id_in(ids)) }
end
class_methods do
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
node.container_repositories
end
end
def push_blob(digest, file_path)
client.push_blob(path, digest, file_path)
end
......
......@@ -16,6 +16,13 @@ module EE
scope :project_id_in, ->(ids) { joins(:projects).merge(::Project.id_in(ids)) }
end
class_methods do
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
local_storage_only = !node&.sync_object_storage
local_storage_only ? node.lfs_objects.with_files_stored_locally : node.lfs_objects
end
end
def log_geo_deleted_event
::Geo::LfsObjectDeletedEventStore.new(self).create!
end
......
......@@ -192,6 +192,10 @@ module EE
class_methods do
extend ::Gitlab::Utils::Override
def replicables_for_geo_node(node = ::Gitlab::Geo.current_node)
node.projects
end
def search_by_visibility(level)
where(visibility_level: ::Gitlab::VisibilityLevel.string_options[level])
end
......
......@@ -41,31 +41,14 @@ class Geo::BaseRegistry < Geo::TrackingBase
end
end
def self.replicator_class
self::MODEL_CLASS.replicator_class
end
def self.find_unsynced_registries(batch_size:, except_ids: [])
pending
.model_id_not_in(except_ids)
.limit(batch_size)
end
def self.find_failed_registries(batch_size:, except_ids: [])
failed
.retry_due
.model_id_not_in(except_ids)
.limit(batch_size)
end
def self.has_create_events?
true
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.replicator_class
self::MODEL_CLASS.replicator_class
end
def self.find_registry_differences(range)
source_ids = self::MODEL_CLASS
.replicables_for_geo_node
......@@ -80,6 +63,22 @@ class Geo::BaseRegistry < Geo::TrackingBase
[untracked_ids, unused_tracked_ids]
end
def self.find_registries_never_attempted_sync(batch_size:, except_ids: [])
never_attempted_sync
.model_id_not_in(except_ids)
.limit(batch_size)
end
def self.find_registries_needs_sync_again(batch_size:, except_ids: [])
needs_sync_again
.model_id_not_in(except_ids)
.limit(batch_size)
end
def self.has_create_events?
true
end
def model_record_id
read_attribute(self.class::MODEL_FOREIGN_KEY)
end
......
......@@ -8,10 +8,11 @@ class Geo::ContainerRepositoryRegistry < Geo::BaseRegistry
belongs_to :container_repository
scope :never_synced, -> { with_state(:pending).where(last_synced_at: nil) }
scope :failed, -> { with_state(:failed) }
scope :synced, -> { with_state(:synced) }
scope :needs_sync_again, -> { failed.retry_due }
scope :never_attempted_sync, -> { with_state(:pending).where(last_synced_at: nil) }
scope :retry_due, -> { where(arel_table[:retry_at].eq(nil).or(arel_table[:retry_at].lt(Time.current))) }
scope :synced, -> { with_state(:synced) }
state_machine :state, initial: :pending do
state :started
......@@ -37,12 +38,8 @@ class Geo::ContainerRepositoryRegistry < Geo::BaseRegistry
end
end
def self.finder_class
::Geo::ContainerRepositoryRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
def self.find_registries_needs_sync_again(batch_size:, except_ids: [])
super.order(Gitlab::Database.nulls_first_order(:last_synced_at))
end
def self.delete_for_model_ids(container_repository_ids)
......
......@@ -10,11 +10,12 @@ class Geo::DesignRegistry < Geo::BaseRegistry
belongs_to :project
scope :never_synced, -> { with_state(:pending).where(last_synced_at: nil) }
scope :pending, -> { with_state(:pending) }
scope :dirty, -> { with_state(:pending).where.not(last_synced_at: nil) }
scope :failed, -> { with_state(:failed) }
scope :synced, -> { with_state(:synced) }
scope :needs_sync_again, -> { dirty.or(failed.retry_due) }
scope :never_attempted_sync, -> { with_state(:pending).where(last_synced_at: nil) }
scope :retry_due, -> { where(arel_table[:retry_at].eq(nil).or(arel_table[:retry_at].lt(Time.current))) }
scope :synced, -> { with_state(:synced) }
state_machine :state, initial: :pending do
state :started
......@@ -50,12 +51,18 @@ class Geo::DesignRegistry < Geo::BaseRegistry
project_ids
end
def self.finder_class
::Geo::DesignRegistryFinder
def self.find_registry_differences(range)
source_ids = Gitlab::Geo.current_node.designs.id_in(range).pluck_primary_key
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
def self.find_registries_needs_sync_again(batch_size:, except_ids: [])
super.order(Gitlab::Database.nulls_first_order(:last_synced_at))
end
# Search for a list of projects associated with registries,
......@@ -75,10 +82,6 @@ class Geo::DesignRegistry < Geo::BaseRegistry
designs_repositories
end
def self.updated_recently
pending.or(failed.retry_due)
end
def fail_sync!(message, error, attrs = {})
new_retry_count = retry_count + 1
......
......@@ -6,20 +6,6 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry
MODEL_CLASS = ::Ci::JobArtifact
MODEL_FOREIGN_KEY = :artifact_id
scope :never, -> { where(success: false, retry_count: nil) }
def self.failed
where(success: false).where.not(retry_count: nil)
end
def self.finder_class
::Geo::JobArtifactRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
# When false, RegistryConsistencyService will frequently check the end of the
# table to quickly handle new replicables.
def self.has_create_events?
......
......@@ -11,20 +11,6 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry
belongs_to :lfs_object, class_name: 'LfsObject'
scope :never, -> { where(success: false, retry_count: nil) }
def self.failed
where(success: false).where.not(retry_count: nil)
end
def self.finder_class
::Geo::LfsObjectRegistryFinder
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
# If false, RegistryConsistencyService will frequently check the end of the
# table to quickly handle new replicables.
def self.has_create_events?
......
......@@ -21,8 +21,9 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
validates :project, presence: true, uniqueness: true
scope :never_synced, -> { where(last_repository_synced_at: nil) }
scope :dirty, -> { where(arel_table[:resync_repository].eq(true).or(arel_table[:resync_wiki].eq(true))) }
scope :needs_sync_again, -> { dirty.retry_due }
scope :never_attempted_sync, -> { where(last_repository_synced_at: nil) }
scope :synced_repos, -> { where(resync_repository: false) }
scope :synced_wikis, -> { where(resync_wiki: false) }
scope :failed_repos, -> { where(arel_table[:repository_retry_count].gt(0)) }
......@@ -43,14 +44,8 @@ class Geo::ProjectRegistry < Geo::BaseRegistry
where(nil).pluck(:project_id)
end
def self.find_registry_differences(range)
source_ids = Gitlab::Geo.current_node.projects.id_in(range).pluck_primary_key
tracked_ids = self.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
def self.find_registries_needs_sync_again(batch_size:, except_ids: [])
super.order(Gitlab::Database.nulls_first_order(:last_repository_synced_at))
end
def self.delete_worker_class
......
......@@ -10,16 +10,43 @@ class Geo::UploadRegistry < Geo::BaseRegistry
belongs_to :upload, foreign_key: :file_id
scope :failed, -> { where(success: false).where.not(retry_count: nil) }
scope :fresh, -> { order(created_at: :desc) }
scope :never, -> { where(success: false, retry_count: nil) }
def self.finder_class
::Geo::AttachmentRegistryFinder
end
# Returns untracked uploads as well as tracked uploads that are unused.
#
# Untracked uploads is an array where each item is a tuple of [id, file_type]
# that is supposed to be synced but don't yet have a registry entry.
#
# Unused uploads is an array where each item is a tuple of [id, file_type]
# that is not supposed to be synced but already have a registry entry. For
# example:
#
# - orphaned registries
# - records that became excluded from selective sync
# - records that are in object storage, and `sync_object_storage` became
# disabled
#
# We compute both sets in this method to reduce the number of DB queries
# performed.
#
# @return [Array] the first element is an Array of untracked uploads, and the
# second element is an Array of tracked uploads that are unused.
# For example: [[[1, 'avatar'], [5, 'file']], [[3, 'attachment']]]
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
source =
self::MODEL_CLASS.replicables_for_geo_node
.id_in(range)
.pluck(self::MODEL_CLASS.arel_table[:id], self::MODEL_CLASS.arel_table[:uploader])
.map! { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
tracked =
self.model_id_in(range)
.pluck(:file_id, :file_type)
untracked = source - tracked
unused_tracked = tracked - source
[untracked, unused_tracked]
end
# If false, RegistryConsistencyService will frequently check the end of the
......@@ -52,9 +79,8 @@ class Geo::UploadRegistry < Geo::BaseRegistry
case status
when 'synced', 'failed'
self.public_send(status) # rubocop: disable GitlabSecurity/PublicSend
# Explained via: https://gitlab.com/gitlab-org/gitlab/-/issues/216049
when 'pending'
self.never
never_attempted_sync
else
all
end
......
......@@ -483,7 +483,7 @@ class GeoNodeStatus < ApplicationRecord
self.lfs_objects_synced_count = lfs_objects_finder.synced_count
self.lfs_objects_failed_count = lfs_objects_finder.failed_count
self.lfs_objects_registry_count = lfs_objects_finder.registry_count
self.lfs_objects_synced_missing_on_primary_count = lfs_objects_finder.count_synced_missing_on_primary
self.lfs_objects_synced_missing_on_primary_count = lfs_objects_finder.synced_missing_on_primary_count
end
def load_job_artifacts_data
......@@ -493,7 +493,7 @@ class GeoNodeStatus < ApplicationRecord
self.job_artifacts_synced_count = job_artifacts_finder.synced_count
self.job_artifacts_failed_count = job_artifacts_finder.failed_count
self.job_artifacts_registry_count = job_artifacts_finder.registry_count
self.job_artifacts_synced_missing_on_primary_count = job_artifacts_finder.count_synced_missing_on_primary
self.job_artifacts_synced_missing_on_primary_count = job_artifacts_finder.synced_missing_on_primary_count
end
def load_attachments_data
......@@ -503,7 +503,7 @@ class GeoNodeStatus < ApplicationRecord
self.attachments_synced_count = attachments_finder.synced_count
self.attachments_failed_count = attachments_finder.failed_count
self.attachments_registry_count = attachments_finder.registry_count
self.attachments_synced_missing_on_primary_count = attachments_finder.count_synced_missing_on_primary
self.attachments_synced_missing_on_primary_count = attachments_finder.synced_missing_on_primary_count
end
def load_container_registry_data
......@@ -581,23 +581,23 @@ class GeoNodeStatus < ApplicationRecord
end
def attachments_finder
@attachments_finder ||= Geo::AttachmentRegistryFinder.new(current_node_id: geo_node.id)
@attachments_finder ||= Geo::AttachmentRegistryFinder.new
end
def lfs_objects_finder
@lfs_objects_finder ||= Geo::LfsObjectRegistryFinder.new(current_node_id: geo_node.id)
@lfs_objects_finder ||= Geo::LfsObjectRegistryFinder.new
end
def job_artifacts_finder
@job_artifacts_finder ||= Geo::JobArtifactRegistryFinder.new(current_node_id: geo_node.id)
@job_artifacts_finder ||= Geo::JobArtifactRegistryFinder.new
end
def container_registry_finder
@container_registry_finder ||= Geo::ContainerRepositoryRegistryFinder.new(current_node_id: geo_node.id)
@container_registry_finder ||= Geo::ContainerRepositoryRegistryFinder.new
end
def design_registry_finder
@design_registry_finder ||= Geo::DesignRegistryFinder.new(current_node_id: geo_node.id)
@design_registry_finder ||= Geo::DesignRegistryFinder.new
end
def repository_verification_finder
......
......@@ -36,30 +36,30 @@ module Geo
#
# @return [Array] resources to be transferred
def load_pending_resources
resources = find_container_repository_ids_not_synced(batch_size: db_retrieve_batch_size)
resources = find_jobs_never_attempted_sync(batch_size: db_retrieve_batch_size)
remaining_capacity = db_retrieve_batch_size - resources.size
if remaining_capacity == 0
resources
else
resources + find_retryable_container_registry_ids(batch_size: remaining_capacity)
resources + find_jobs_needs_sync_again(batch_size: remaining_capacity)
end
end
def find_container_repository_ids_not_synced(batch_size:)
def find_jobs_never_attempted_sync(batch_size:)
registry_finder
.find_never_synced_registries(batch_size: batch_size, except_ids: scheduled_repository_ids)
.find_registries_never_attempted_sync(batch_size: batch_size, except_ids: scheduled_repository_ids)
.pluck_model_foreign_key
end
def find_retryable_container_registry_ids(batch_size:)
def find_jobs_needs_sync_again(batch_size:)
registry_finder
.find_retryable_dirty_registries(batch_size: batch_size, except_ids: scheduled_repository_ids)
.find_registries_needs_sync_again(batch_size: batch_size, except_ids: scheduled_repository_ids)
.pluck_model_foreign_key
end
def registry_finder
@registry_finder ||= Geo::ContainerRepositoryRegistryFinder.new(current_node_id: current_node.id)
@registry_finder ||= Geo::ContainerRepositoryRegistryFinder.new
end
end
end
......@@ -11,10 +11,10 @@ module Geo
end
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_not_synced(except_ids:, batch_size:)
def find_jobs_never_attempted_sync(except_ids:, batch_size:)
project_ids =
registry_finder
.find_never_synced_registries(batch_size: batch_size, except_ids: except_ids)
.find_registries_never_attempted_sync(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
find_project_ids_within_shard(project_ids, direction: :desc)
......@@ -22,10 +22,10 @@ module Geo
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_updated_recently(except_ids:, batch_size:)
def find_jobs_needs_sync_again(except_ids:, batch_size:)
project_ids =
registry_finder
.find_retryable_dirty_registries(batch_size: batch_size, except_ids: except_ids)
.find_registries_needs_sync_again(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
find_project_ids_within_shard(project_ids, direction: :asc)
......
......@@ -33,7 +33,7 @@ module Geo
#
# @return [Array] resources to be transferred
def load_pending_resources
resources = find_unsynced_jobs(batch_size: db_retrieve_batch_size)
resources = find_jobs_never_attempted_sync(batch_size: db_retrieve_batch_size)
remaining_capacity = db_retrieve_batch_size - resources.count
if remaining_capacity == 0
......@@ -43,12 +43,13 @@ module Geo
end
end
# Get a batch of unsynced resources, taking equal parts from each resource.
# Get a batch of resources that never have an attempt to sync, taking
# equal parts from each resource.
#
# @return [Array] job arguments of unsynced resources
def find_unsynced_jobs(batch_size:)
# @return [Array] job arguments of resources that never have an attempt to sync
def find_jobs_never_attempted_sync(batch_size:)
jobs = job_finders.reduce([]) do |jobs, job_finder|
jobs << job_finder.find_unsynced_jobs(batch_size: batch_size)
jobs << job_finder.find_jobs_never_attempted_sync(batch_size: batch_size)
end
take_batch(*jobs, batch_size: batch_size)
......@@ -60,8 +61,8 @@ module Geo
# @return [Array] job arguments of low priority resources
def find_low_priority_jobs(batch_size:)
jobs = job_finders.reduce([]) do |jobs, job_finder|
jobs << job_finder.find_failed_jobs(batch_size: batch_size)
jobs << job_finder.find_synced_missing_on_primary_jobs(batch_size: batch_size)
jobs << job_finder.find_jobs_needs_sync_again(batch_size: batch_size)
jobs << job_finder.find_jobs_synced_missing_on_primary(batch_size: batch_size)
end
take_batch(*jobs, batch_size: batch_size)
......
......@@ -6,7 +6,7 @@ module Geo
EXCEPT_RESOURCE_IDS_KEY = :except_ids
def registry_finder
@registry_finder ||= Geo::AttachmentRegistryFinder.new(current_node_id: Gitlab::Geo.current_node.id)
@registry_finder ||= Geo::AttachmentRegistryFinder.new
end
private
......
......@@ -8,7 +8,7 @@ module Geo
FILE_SERVICE_OBJECT_TYPE = :job_artifact
def registry_finder
@registry_finder ||= Geo::JobArtifactRegistryFinder.new(current_node_id: Gitlab::Geo.current_node.id)
@registry_finder ||= Geo::JobArtifactRegistryFinder.new
end
end
end
......
......@@ -21,19 +21,19 @@ module Geo
@scheduled_file_ids = scheduled_file_ids
end
def find_unsynced_jobs(batch_size:)
def find_jobs_never_attempted_sync(batch_size:)
convert_registry_relation_to_job_args(
registry_finder.find_never_synced_registries(find_batch_params(batch_size))
registry_finder.find_registries_never_attempted_sync(find_batch_params(batch_size))
)
end
def find_failed_jobs(batch_size:)
def find_jobs_needs_sync_again(batch_size:)
convert_registry_relation_to_job_args(
registry_finder.find_retryable_failed_registries(find_batch_params(batch_size))
registry_finder.find_registries_needs_sync_again(find_batch_params(batch_size))
)
end
def find_synced_missing_on_primary_jobs(batch_size:)
def find_jobs_synced_missing_on_primary(batch_size:)
convert_registry_relation_to_job_args(
registry_finder.find_retryable_synced_missing_on_primary_registries(find_batch_params(batch_size))
)
......
......@@ -8,7 +8,7 @@ module Geo
FILE_SERVICE_OBJECT_TYPE = :lfs
def registry_finder
@registry_finder ||= Geo::LfsObjectRegistryFinder.new(current_node_id: Gitlab::Geo.current_node.id)
@registry_finder ||= Geo::LfsObjectRegistryFinder.new
end
end
end
......
......@@ -32,25 +32,26 @@ module Geo
#
# @return [Array] resources to be transferred
def load_pending_resources
resources = find_unsynced_jobs(batch_size: db_retrieve_batch_size)
resources = find_jobs_never_attempted_sync(batch_size: db_retrieve_batch_size)
remaining_capacity = db_retrieve_batch_size - resources.count
if remaining_capacity == 0
resources
else
resources + find_low_priority_jobs(batch_size: remaining_capacity)
resources + find_jobs_needs_sync_again(batch_size: remaining_capacity)
end
end
# Get a batch of unsynced resources, taking equal parts from each resource.
# Get a batch of resources that never have an attempt to sync, taking
# equal parts from each resource.
#
# @return [Array] job arguments of unsynced resources
def find_unsynced_jobs(batch_size:)
# @return [Array] job arguments of resources that never have an attempt to sync
def find_jobs_never_attempted_sync(batch_size:)
jobs = replicator_classes.reduce([]) do |jobs, replicator_class|
except_ids = scheduled_replicable_ids(replicator_class.replicable_name)
jobs << replicator_class
.find_unsynced_registries(batch_size: batch_size, except_ids: except_ids)
.find_registries_never_attempted_sync(batch_size: batch_size, except_ids: except_ids)
.map { |registry| [replicator_class.replicable_name, registry.model_record_id] }
end
......@@ -61,12 +62,12 @@ module Geo
# equal parts from each resource.
#
# @return [Array] job arguments of low priority resources
def find_low_priority_jobs(batch_size:)
def find_jobs_needs_sync_again(batch_size:)
jobs = replicator_classes.reduce([]) do |jobs, replicator_class|
except_ids = scheduled_replicable_ids(replicator_class.replicable_name)
jobs << replicator_class
.find_failed_registries(batch_size: batch_size, except_ids: except_ids)
.find_registries_needs_sync_again(batch_size: batch_size, except_ids: except_ids)
.map { |registry| [replicator_class.replicable_name, registry.model_record_id] }
end
......
......@@ -64,21 +64,21 @@ module Geo
def load_pending_resources
return [] unless valid_shard?
resources = find_project_ids_not_synced(except_ids: scheduled_project_ids, batch_size: db_retrieve_batch_size)
resources = find_jobs_never_attempted_sync(except_ids: scheduled_project_ids, batch_size: db_retrieve_batch_size)
remaining_capacity = db_retrieve_batch_size - resources.size
if remaining_capacity == 0
resources
else
resources + find_project_ids_updated_recently(except_ids: scheduled_project_ids + resources, batch_size: remaining_capacity)
resources + find_jobs_needs_sync_again(except_ids: scheduled_project_ids + resources, batch_size: remaining_capacity)
end
end
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_not_synced(except_ids:, batch_size:)
def find_jobs_never_attempted_sync(except_ids:, batch_size:)
project_ids =
registry_finder
.find_never_synced_registries(batch_size: batch_size, except_ids: except_ids)
.find_registries_never_attempted_sync(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
find_project_ids_within_shard(project_ids, direction: :desc)
......@@ -86,10 +86,10 @@ module Geo
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_project_ids_updated_recently(except_ids:, batch_size:)
def find_jobs_needs_sync_again(except_ids:, batch_size:)
project_ids =
registry_finder
.find_retryable_dirty_registries(batch_size: batch_size, except_ids: except_ids)
.find_registries_needs_sync_again(batch_size: batch_size, except_ids: except_ids)
.pluck_model_foreign_key
find_project_ids_within_shard(project_ids, direction: :asc)
......
......@@ -22,7 +22,7 @@ module Gitlab
delegate :in_replicables_for_geo_node?, to: :model_record
class << self
delegate :find_unsynced_registries, :find_failed_registries, to: :registry_class
delegate :find_registries_never_attempted_sync, :find_registries_needs_sync_again, to: :registry_class
end
# Declare supported event
......
......@@ -3,219 +3,26 @@
require 'spec_helper'
RSpec.describe Geo::AttachmentRegistryFinder, :geo do
include ::EE::GeoHelpers
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:synced_subgroup) { create(:group, parent: synced_group) }
let_it_be(:unsynced_group) { create(:group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: synced_subgroup) }
let_it_be(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
let_it_be(:upload_1) { create(:upload, model: synced_group) }
let_it_be(:upload_2) { create(:upload, model: unsynced_group) }
let_it_be(:upload_3) { create(:upload, :issuable_upload, model: synced_project_in_nested_group) }
let_it_be(:upload_4) { create(:upload, model: unsynced_project) }
let_it_be(:upload_5) { create(:upload, model: synced_project) }
let_it_be(:upload_6) { create(:upload, :personal_snippet_upload) }
let_it_be(:upload_7) { create(:upload, :object_storage, model: synced_project) }
let_it_be(:upload_8) { create(:upload, :object_storage, model: unsynced_project) }
let_it_be(:upload_9) { create(:upload, :object_storage, model: synced_group) }
before do
stub_current_geo_node(secondary)
end
subject { described_class.new(current_node_id: secondary.id) }
describe '#registry_count' do
it 'counts registries for uploads' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
expect(subject.registry_count).to eq 8
end
end
describe '#synced_count' do
it 'counts registries that has been synced' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
expect(subject.synced_count).to eq 3
end
end
describe '#failed_count' do
it 'counts registries that sync has failed' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
expect(subject.failed_count).to eq 3
end
end
describe '#count_synced_missing_on_primary' do
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
expect(subject.count_synced_missing_on_primary).to eq 3
end
end
describe '#find_registry_differences' do
it 'returns untracked IDs as well as tracked IDs that are unused', :aggregate_failures do
max_id = Upload.maximum(:id)
create(:geo_upload_registry, :avatar, file_id: upload_1.id)
create(:geo_upload_registry, :file, file_id: upload_3.id)
create(:geo_upload_registry, :avatar, file_id: upload_5.id)
create(:geo_upload_registry, :personal_file, file_id: upload_6.id)
create(:geo_upload_registry, :avatar, file_id: upload_7.id)
unused_registry_1 = create(:geo_upload_registry, :attachment, file_id: max_id + 1)
unused_registry_2 = create(:geo_upload_registry, :personal_file, file_id: max_id + 2)
range = 1..(max_id + 2)
untracked, unused = subject.find_registry_differences(range)
expected_untracked = [
[upload_2.id, 'avatar'],
[upload_4.id, 'avatar'],
[upload_8.id, 'avatar'],
[upload_9.id, 'avatar']
]
expected_unused = [
[unused_registry_1.file_id, 'attachment'],
[unused_registry_2.file_id, 'personal_file']
]
expect(untracked).to match_array(expected_untracked)
expect(unused).to match_array(expected_unused)
end
it_behaves_like 'a file registry finder' do
let_it_be(:project) { create(:project) }
let_it_be(:replicable_1) { create(:upload, model: project) }
let_it_be(:replicable_2) { create(:upload, model: project) }
let_it_be(:replicable_3) { create(:upload, :issuable_upload, model: project) }
let_it_be(:replicable_4) { create(:upload, model: project) }
let_it_be(:replicable_5) { create(:upload, model: project) }
let_it_be(:replicable_6) { create(:upload, :personal_snippet_upload) }
let_it_be(:replicable_7) { create(:upload, :object_storage, model: project) }
let_it_be(:replicable_8) { create(:upload, :object_storage, model: project) }
let_it_be(:replicable_9) { create(:upload, :object_storage, model: project) }
let_it_be(:registry_1) { create(:geo_upload_registry, :attachment, :failed, file_id: replicable_1.id) }
let_it_be(:registry_2) { create(:geo_upload_registry, :attachment, file_id: replicable_2.id, missing_on_primary: true) }
let_it_be(:registry_3) { create(:geo_upload_registry, :attachment, :never_synced, file_id: replicable_3.id) }
let_it_be(:registry_4) { create(:geo_upload_registry, :attachment, :failed, file_id: replicable_4.id) }
let_it_be(:registry_5) { create(:geo_upload_registry, :attachment, file_id: replicable_5.id, missing_on_primary: true, retry_at: 1.day.ago) }
let_it_be(:registry_6) { create(:geo_upload_registry, :attachment, :failed, file_id: replicable_6.id) }
let_it_be(:registry_7) { create(:geo_upload_registry, :attachment, :failed, file_id: replicable_7.id, missing_on_primary: true) }
let_it_be(:registry_8) { create(:geo_upload_registry, :attachment, :never_synced, file_id: replicable_8.id) }
end
describe '#find_never_synced_registries' do
it 'returns registries for uploads that have never been synced' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
registry_upload_3 = create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
registry_upload_8 = create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_upload_3, registry_upload_8)
end
it 'excludes except_ids' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
registry_upload_8 = create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [upload_3.id])
expect(registries).to match_ids(registry_upload_8)
end
end
describe '#find_retryable_failed_registries' do
it 'returns registries for job artifacts that have failed to sync' do
registry_upload_1 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
registry_upload_4 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_upload_6 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_retryable_failed_registries(batch_size: 10)
expect(registries).to match_ids(registry_upload_1, registry_upload_4, registry_upload_6)
end
it 'excludes except_ids' do
registry_upload_1 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_upload_6 = create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_retryable_failed_registries(batch_size: 10, except_ids: [upload_4.id])
expect(registries).to match_ids(registry_upload_1, registry_upload_6)
end
end
describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries for job artifacts that have been synced and are missing on the primary' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
registry_upload_2 = create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
registry_upload_5 = create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(registries).to match_ids(registry_upload_2, registry_upload_5)
end
it 'excludes except_ids' do
create(:geo_upload_registry, :attachment, :failed, file_id: upload_1.id)
registry_upload_2 = create(:geo_upload_registry, :attachment, file_id: upload_2.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_3.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_4.id)
create(:geo_upload_registry, :attachment, file_id: upload_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_6.id)
create(:geo_upload_registry, :attachment, :failed, file_id: upload_7.id, missing_on_primary: true)
create(:geo_upload_registry, :attachment, :never_synced, file_id: upload_8.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [upload_5.id])
expect(registries).to match_ids(registry_upload_2)
end
end
it_behaves_like 'a file registry finder'
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do
include ::EE::GeoHelpers
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_synced_group) { create(:project, group: synced_group) }
let_it_be(:project_nested_group) { create(:project, group: nested_group) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:container_repository_1) { create(:container_repository, project: project_synced_group) }
let_it_be(:container_repository_2) { create(:container_repository, project: project_nested_group) }
let_it_be(:container_repository_3) { create(:container_repository) }
let_it_be(:container_repository_4) { create(:container_repository) }
let_it_be(:container_repository_5) { create(:container_repository, project: project_broken_storage) }
let_it_be(:container_repository_6) { create(:container_repository, project: project_broken_storage) }
subject { described_class.new(current_node_id: secondary.id) }
it_behaves_like 'a registry finder' do
before do
stub_current_geo_node(secondary)
stub_registry_replication_config(enabled: true)
end
describe '#registry_count' do
it 'returns number of container registries' do
create(:container_repository_registry, :synced, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
expect(subject.registry_count).to eq(2)
end
end
describe '#synced_count' do
it 'returns only synced registry' do
create(:container_repository_registry, :synced, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
expect(subject.synced_count).to eq(1)
end
end
describe '#failed_count' do
it 'returns only failed registry' do
create(:container_repository_registry, :synced, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
expect(subject.failed_count).to eq(1)
end
end
describe '#find_registry_differences' do
context 'untracked IDs' do
before do
create(:container_repository_registry, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
create(:container_repository_registry, container_repository_id: container_repository_5.id)
end
it 'includes container registries IDs without an entry on the tracking database' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id, container_repository_4.id, container_repository_6.id])
end
it 'excludes container registries outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(container_repository_4.id..container_repository_6.id)
expect(untracked_ids).to match_array([container_repository_4.id, container_repository_6.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes container_registry IDs that projects are not in the selected namespaces' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes container_registry IDs that projects are not in the selected shards' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
before do
container_repository_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
it 'excludes IDs outside the ID range' do
range = (container_repository_1.id + 1)..ContainerRepository.maximum(:id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked container_registry' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_3.id) }
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_3.id..container_repository_3.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked container_registry' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
context 'excluded from selective sync' do
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_5.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_5.id..container_repository_5.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#find_never_synced_registries' do
let_it_be(:registry_container_registry_1) { create(:container_repository_registry, :synced, container_repository_id: container_repository_1.id) }
let_it_be(:registry_container_registry_2) { create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_2.id) }
let_it_be(:registry_container_registry_3) { create(:container_repository_registry, container_repository_id: container_repository_3.id, last_synced_at: nil) }
let_it_be(:registry_container_registry_4) { create(:container_repository_registry, container_repository_id: container_repository_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let_it_be(:registry_container_registry_5) { create(:container_repository_registry, container_repository_id: container_repository_5.id, last_synced_at: 6.days.ago) }
let_it_be(:registry_container_registry_6) { create(:container_repository_registry, container_repository_id: container_repository_6.id, last_synced_at: nil) }
it 'returns registries for projects that have never been synced' do
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_container_registry_3, registry_container_registry_6)
end
it 'excludes except_ids' do
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [container_repository_3.id])
expect(registries).to match_ids(registry_container_registry_6)
end
end
describe '#find_retryable_dirty_registries' do
let_it_be(:registry_container_registry_1) { create(:container_repository_registry, :synced, container_repository_id: container_repository_1.id) }
let_it_be(:registry_container_registry_2) { create(:container_repository_registry, :sync_started, container_repository_id: container_repository_2.id) }
let_it_be(:registry_container_registry_3) { create(:container_repository_registry, state: :failed, container_repository_id: container_repository_3.id, last_synced_at: nil) }
let_it_be(:registry_container_registry_4) { create(:container_repository_registry, state: :failed, container_repository_id: container_repository_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let_it_be(:registry_container_registry_5) { create(:container_repository_registry, state: :failed, container_repository_id: container_repository_5.id, last_synced_at: 6.days.ago) }
let_it_be(:registry_container_registry_6) { create(:container_repository_registry, state: :failed, container_repository_id: container_repository_6.id, last_synced_at: nil) }
it 'returns registries for projects that have been recently updated' do
registries = subject.find_retryable_dirty_registries(batch_size: 10)
expect(registries).to match_ids(registry_container_registry_3, registry_container_registry_4, registry_container_registry_5, registry_container_registry_6)
end
it 'excludes except_ids' do
registries = subject.find_retryable_dirty_registries(batch_size: 10, except_ids: [container_repository_4.id, container_repository_5.id, container_repository_6.id])
expect(registries).to match_ids(registry_container_registry_3)
end
let_it_be(:project) { create(:project) }
let_it_be(:replicable_1) { create(:container_repository, project: project) }
let_it_be(:replicable_2) { create(:container_repository, project: project) }
let_it_be(:replicable_3) { create(:container_repository, project: project) }
let_it_be(:replicable_4) { create(:container_repository, project: project) }
let_it_be(:replicable_5) { create(:container_repository, project: project) }
let_it_be(:replicable_6) { create(:container_repository, project: project) }
let_it_be(:replicable_7) { create(:container_repository, project: project) }
let_it_be(:replicable_8) { create(:container_repository, project: project) }
let_it_be(:registry_1) { create(:container_repository_registry, :sync_failed, container_repository_id: replicable_1.id) }
let_it_be(:registry_2) { create(:container_repository_registry, :synced, container_repository_id: replicable_2.id) }
let_it_be(:registry_3) { create(:container_repository_registry, container_repository_id: replicable_3.id) }
let_it_be(:registry_4) { create(:container_repository_registry, :sync_failed, container_repository_id: replicable_4.id) }
let_it_be(:registry_5) { create(:container_repository_registry, :synced, container_repository_id: replicable_5.id) }
let_it_be(:registry_6) { create(:container_repository_registry, :sync_failed, container_repository_id: replicable_6.id) }
let_it_be(:registry_7) { create(:container_repository_registry, :sync_failed, container_repository_id: replicable_7.id) }
let_it_be(:registry_8) { create(:container_repository_registry, container_repository_id: replicable_8.id) }
end
end
......@@ -2,242 +2,29 @@
require 'spec_helper'
RSpec.describe Geo::DesignRegistryFinder, :geo do
include ::EE::GeoHelpers
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_1) { create(:project, group: synced_group) }
let_it_be(:project_2) { create(:project, group: nested_group) }
let_it_be(:project_3) { create(:project) }
let_it_be(:project_4) { create(:project) }
let_it_be(:project_5) { create(:project, :broken_storage) }
let_it_be(:project_6) { create(:project, :broken_storage) }
let_it_be(:project_7) { create(:project) }
subject { described_class.new(current_node_id: secondary.id) }
it_behaves_like 'a registry finder' do
before do
stub_current_geo_node(secondary)
end
describe '#registry_count' do
it 'returns number of desgin registries' do
create(:geo_design_registry, :synced, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_2.id)
expect(subject.registry_count).to eq(2)
end
end
describe '#synced_count' do
it 'returns number of synced registries' do
create(:geo_design_registry, :synced, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_2.id)
expect(subject.synced_count).to eq(1)
end
end
describe '#failed_count' do
it 'returns number of failed registries' do
create(:geo_design_registry, :synced, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_2.id)
expect(subject.failed_count).to eq(1)
end
end
describe '#find_registry_differences' do
before_all do
create(:design, project: project_1)
create(:design, project: project_2)
create(:design, project: project_3)
create(:design, project: project_4)
create(:design, project: project_5)
create(:design, project: project_6)
end
context 'untracked IDs' do
before do
create(:geo_design_registry, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_3.id)
create(:geo_design_registry, project_id: project_5.id)
end
it 'includes project IDs without an entry on the tracking database' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id, project_4.id, project_6.id])
end
it 'excludes projects outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(project_4.id..project_6.id)
expect(untracked_ids).to match_array([project_4.id, project_6.id])
end
it 'excludes projects without designs' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).not_to include([project_7])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([project_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_design_registry, project_id: project_1.id) }
before do
project_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
it 'excludes IDs outside the ID range' do
range = (project_1.id + 1)..Project.maximum(:id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked project' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_3.id) }
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_3.id..project_3.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked project' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
context 'excluded from selective sync' do
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_1.id..project_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_5.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_5.id..project_5.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#find_never_synced_registries' do
let!(:registry_project_1) { create(:geo_design_registry, :synced, project_id: project_1.id) }
let!(:registry_project_2) { create(:geo_design_registry, :sync_failed, project_id: project_2.id) }
let!(:registry_project_3) { create(:geo_design_registry, project_id: project_3.id, last_synced_at: nil) }
let!(:registry_project_4) { create(:geo_design_registry, project_id: project_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let!(:registry_project_5) { create(:geo_design_registry, project_id: project_5.id, last_synced_at: 6.days.ago) }
let!(:registry_project_6) { create(:geo_design_registry, project_id: project_6.id, last_synced_at: nil) }
it 'returns registries for projects that have never been synced' do
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_project_3, registry_project_6)
end
it 'excludes except_ids' do
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [project_3.id])
expect(registries).to match_ids(registry_project_6)
end
end
describe '#find_retryable_dirty_registries' do
let!(:registry_project_1) { create(:geo_design_registry, :synced, project_id: project_1.id) }
let!(:registry_project_2) { create(:geo_design_registry, :sync_failed, project_id: project_2.id) }
let!(:registry_project_3) { create(:geo_design_registry, project_id: project_3.id, last_synced_at: nil) }
let!(:registry_project_4) { create(:geo_design_registry, project_id: project_4.id, last_synced_at: 3.days.ago, retry_at: 2.days.ago) }
let!(:registry_project_5) { create(:geo_design_registry, project_id: project_5.id, last_synced_at: 6.days.ago) }
let!(:registry_project_6) { create(:geo_design_registry, project_id: project_6.id, last_synced_at: nil) }
it 'returns registries for projects that have been recently updated' do
registries = subject.find_retryable_dirty_registries(batch_size: 10)
expect(registries).to match_ids(registry_project_2, registry_project_3, registry_project_4, registry_project_5, registry_project_6)
end
it 'excludes except_ids' do
registries = subject.find_retryable_dirty_registries(batch_size: 10, except_ids: [project_4.id, project_5.id, project_6.id])
expect(registries).to match_ids(registry_project_2, registry_project_3)
end
stub_registry_replication_config(enabled: true)
end
let_it_be(:group) { create(:group) }
let_it_be(:replicable_1) { create(:project, group: group) }
let_it_be(:replicable_2) { create(:project, group: group) }
let_it_be(:replicable_3) { create(:project, group: group) }
let_it_be(:replicable_4) { create(:project, group: group) }
let_it_be(:replicable_5) { create(:project, group: group) }
let_it_be(:replicable_6) { create(:project, group: group) }
let_it_be(:replicable_7) { create(:project, group: group) }
let_it_be(:replicable_8) { create(:project, group: group) }
let_it_be(:registry_1) { create(:geo_design_registry, :sync_failed, project_id: replicable_1.id) }
let_it_be(:registry_2) { create(:geo_design_registry, :synced, project_id: replicable_2.id) }
let_it_be(:registry_3) { create(:geo_design_registry, project_id: replicable_3.id) }
let_it_be(:registry_4) { create(:geo_design_registry, :sync_failed, project_id: replicable_4.id) }
let_it_be(:registry_5) { create(:geo_design_registry, :synced, project_id: replicable_5.id) }
let_it_be(:registry_6) { create(:geo_design_registry, :sync_failed, project_id: replicable_6.id) }
let_it_be(:registry_7) { create(:geo_design_registry, :sync_failed, project_id: replicable_7.id) }
let_it_be(:registry_8) { create(:geo_design_registry, project_id: replicable_8.id) }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Geo::FileRegistryFinder, :geo do
context 'with abstract methods' do
%w[
replicables
registry_class
].each do |required_method|
it "requires subclasses to implement #{required_method}" do
expect { subject.send(required_method) }.to raise_error(NotImplementedError)
end
end
end
describe '#local_storage_only?' do
subject { described_class.new(current_node_id: geo_node.id) }
context 'sync_object_storage is enabled' do
let(:geo_node) { create(:geo_node, sync_object_storage: true) }
it 'returns false' do
expect(subject.local_storage_only?).to be_falsey
end
end
context 'sync_object_storage is disabled' do
let(:geo_node) { create(:geo_node, sync_object_storage: false) }
it 'returns true' do
expect(subject.local_storage_only?).to be_truthy
end
end
end
end
......@@ -3,404 +3,29 @@
require 'spec_helper'
RSpec.describe Geo::JobArtifactRegistryFinder, :geo do
include ::EE::GeoHelpers
let_it_be(:secondary) { create(:geo_node) }
it_behaves_like 'a file registry finder' do
before do
stub_current_geo_node(secondary)
stub_artifacts_object_storage
end
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let!(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) }
let!(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let!(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let!(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) }
let!(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) }
subject { described_class.new(current_node_id: secondary.id) }
describe '#registry_count' do
it 'counts registries for job artifacts' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.registry_count).to eq 8
end
end
describe '#synced_count' do
it 'counts registries that has been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.synced_count).to eq 3
end
end
describe '#failed_count' do
it 'counts registries that sync has failed' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.failed_count).to eq 3
end
end
describe '#count_synced_missing_on_primary' do
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.count_synced_missing_on_primary).to eq 3
end
end
describe '#find_registry_differences' do
# Untracked IDs should not contain any of these expired job artifacts.
let!(:ci_job_artifact_6) { create(:ci_job_artifact, :expired, project: synced_project) }
let!(:ci_job_artifact_7) { create(:ci_job_artifact, :expired, project: unsynced_project) }
let!(:ci_job_artifact_8) { create(:ci_job_artifact, :expired, project: project_broken_storage) }
let!(:ci_job_artifact_remote_4) { create(:ci_job_artifact, :expired, :remote_store) }
context 'untracked IDs' do
before do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
end
it 'includes job artifact IDs without an entry on the tracking database' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id])
end
it 'excludes job artifacts outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id)
expect(untracked_ids).to match_array(
[ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
let_it_be(:project) { create(:project) }
it 'excludes job artifacts in object storage' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
let_it_be(:replicable_1) { create(:ci_job_artifact, project: project) }
let_it_be(:replicable_2) { create(:ci_job_artifact, project: project) }
let_it_be(:replicable_3) { create(:ci_job_artifact, project: project) }
let_it_be(:replicable_4) { create(:ci_job_artifact, project: project) }
let_it_be(:replicable_5) { create(:ci_job_artifact, project: project) }
let!(:replicable_6) { create(:ci_job_artifact, :remote_store, project: project) }
let!(:replicable_7) { create(:ci_job_artifact, :remote_store, project: project) }
let!(:replicable_8) { create(:ci_job_artifact, :remote_store, project: project) }
expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id])
end
let_it_be(:registry_1) { create(:geo_job_artifact_registry, :failed, artifact_id: replicable_1.id) }
let_it_be(:registry_2) { create(:geo_job_artifact_registry, artifact_id: replicable_2.id, missing_on_primary: true) }
let_it_be(:registry_3) { create(:geo_job_artifact_registry, :never_synced, artifact_id: replicable_3.id) }
let_it_be(:registry_4) { create(:geo_job_artifact_registry, :failed, artifact_id: replicable_4.id) }
let_it_be(:registry_5) { create(:geo_job_artifact_registry, artifact_id: replicable_5.id, missing_on_primary: true, retry_at: 1.day.ago) }
let!(:registry_6) { create(:geo_job_artifact_registry, :failed, artifact_id: replicable_6.id) }
let!(:registry_7) { create(:geo_job_artifact_registry, :failed, artifact_id: replicable_7.id, missing_on_primary: true) }
let!(:registry_8) { create(:geo_job_artifact_registry, :never_synced, artifact_id: replicable_8.id) }
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with an expired registry' do
let!(:expired) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_6.id) }
it 'includes expired tracked IDs that exists in the model table' do
range = ci_job_artifact_6.id..ci_job_artifact_6.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_6.id])
end
it 'excludes IDs outside the ID range' do
range = (ci_job_artifact_6.id + 1)..(ci_job_artifact_6.id + 10)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id])
end
end
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
it 'includes expired tracked IDs that are in selectively synced projects' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_6.id)
range = ci_job_artifact_6.id..ci_job_artifact_6.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_6.id])
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id])
end
end
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced shards' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
it 'includes expired tracked IDs that are in selectively synced shards' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_8.id)
range = ci_job_artifact_8.id..ci_job_artifact_8.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_8.id])
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked job artifact' do
context 'in object storage' do
it 'includes tracked job artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_1.id)
range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id])
end
it 'includes expired tracked IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_4.id)
range = ci_job_artifact_remote_4.id..ci_job_artifact_remote_4.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_4.id])
end
end
context 'not in object storage' do
it 'excludes tracked job artifact IDs that are not in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
range = ci_job_artifact_1.id..ci_job_artifact_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#find_never_synced_registries' do
it 'returns registries for job artifacts that have never been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
registry_ci_job_artifact_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3)
end
it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [ci_job_artifact_3.id])
expect(registries).to match_ids(registry_ci_job_artifact_remote_3)
end
end
describe '#find_retryable_failed_registries' do
it 'returns registries for job artifacts that have failed to sync' do
registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
registry_ci_job_artifact_4 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_retryable_failed_registries(batch_size: 10)
expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_4, registry_ci_job_artifact_remote_1)
end
it 'excludes except_ids' do
registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_retryable_failed_registries(batch_size: 10, except_ids: [ci_job_artifact_4.id])
expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_remote_1)
end
end
describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries for job artifacts that have been synced and are missing on the primary' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
registry_ci_job_artifact_5 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(registries).to match_ids(registry_ci_job_artifact_2, registry_ci_job_artifact_5)
end
it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [ci_job_artifact_5.id])
expect(registries).to match_ids(registry_ci_job_artifact_2)
end
end
it_behaves_like 'a file registry finder'
end
......@@ -3,351 +3,27 @@
require 'spec_helper'
RSpec.describe Geo::LfsObjectRegistryFinder, :geo do
let_it_be(:secondary) { create(:geo_node) }
it_behaves_like 'a file registry finder' do
before do
stub_lfs_object_storage
end
let!(:lfs_object_1) { create(:lfs_object) }
let!(:lfs_object_2) { create(:lfs_object) }
let!(:lfs_object_3) { create(:lfs_object) }
let!(:lfs_object_4) { create(:lfs_object) }
let!(:lfs_object_5) { create(:lfs_object) }
let!(:lfs_object_remote_1) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_2) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_3) { create(:lfs_object, :object_storage) }
subject { described_class.new(current_node_id: secondary.id) }
describe '#registry_count' do
it 'counts registries for LFS objects' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
expect(subject.registry_count).to eq 8
end
end
describe '#synced_count' do
it 'counts registries that has been synced' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
expect(subject.synced_count).to eq 3
end
end
describe '#failed_count' do
it 'counts registries that sync has failed' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
expect(subject.failed_count).to eq 3
end
end
describe '#count_synced_missing_on_primary' do
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
expect(subject.count_synced_missing_on_primary).to eq 3
end
end
describe '#find_registry_differences' do
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
context 'untracked IDs' do
before do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_4.id)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_3)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_4)
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_5)
end
it 'includes LFS object IDs without an entry on the tracking database' do
untracked_ids, _ = subject.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array(
[lfs_object_2.id, lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id, lfs_object_remote_3.id])
end
it 'excludes LFS objects outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(lfs_object_3.id..lfs_object_remote_2.id)
expect(untracked_ids).to match_array(
[lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes LFS objects in object storage' do
untracked_ids, _ = subject.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id, lfs_object_5.id])
end
let_it_be(:replicable_1) { create(:lfs_object) }
let_it_be(:replicable_2) { create(:lfs_object) }
let_it_be(:replicable_3) { create(:lfs_object) }
let_it_be(:replicable_4) { create(:lfs_object) }
let_it_be(:replicable_5) { create(:lfs_object) }
let!(:replicable_6) { create(:lfs_object, :object_storage) }
let!(:replicable_7) { create(:lfs_object, :object_storage) }
let!(:replicable_8) { create(:lfs_object, :object_storage) }
let_it_be(:registry_1) { create(:geo_lfs_object_registry, :failed, lfs_object_id: replicable_1.id) }
let_it_be(:registry_2) { create(:geo_lfs_object_registry, lfs_object_id: replicable_2.id, missing_on_primary: true) }
let_it_be(:registry_3) { create(:geo_lfs_object_registry, :never_synced, lfs_object_id: replicable_3.id) }
let_it_be(:registry_4) { create(:geo_lfs_object_registry, :failed, lfs_object_id: replicable_4.id) }
let_it_be(:registry_5) { create(:geo_lfs_object_registry, lfs_object_id: replicable_5.id, missing_on_primary: true, retry_at: 1.day.ago) }
let!(:registry_6) { create(:geo_lfs_object_registry, :failed, lfs_object_id: replicable_6.id) }
let!(:registry_7) { create(:geo_lfs_object_registry, :failed, lfs_object_id: replicable_7.id, missing_on_primary: true) }
let!(:registry_8) { create(:geo_lfs_object_registry, :never_synced, lfs_object_id: replicable_8.id) }
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_lfs_object_registry, lfs_object_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked LFS object' do
context 'in object storage' do
it 'includes tracked LFS object IDs that are in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_1.id)
range = lfs_object_remote_1.id..lfs_object_remote_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_remote_1.id])
end
end
context 'not in object storage' do
it 'excludes tracked LFS object IDs that are not in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
range = lfs_object_1.id..lfs_object_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#find_never_synced_registries' do
it 'returns registries for LFS objects that have never been synced' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
registry_lfs_object_3 = create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
registry_lfs_object_remote_3 = create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_lfs_object_3, registry_lfs_object_remote_3)
end
it 'excludes except_ids' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
registry_lfs_object_remote_3 = create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [lfs_object_3.id])
expect(registries).to match_ids(registry_lfs_object_remote_3)
end
end
describe '#find_retryable_failed_registries' do
it 'returns registries for LFS objects that have failed to sync' do
registry_lfs_object_1 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
registry_lfs_object_4 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_lfs_object_remote_1 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_retryable_failed_registries(batch_size: 10)
expect(registries).to match_ids(registry_lfs_object_1, registry_lfs_object_4, registry_lfs_object_remote_1)
end
it 'excludes except_ids' do
registry_lfs_object_1 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_lfs_object_remote_1 = create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_retryable_failed_registries(batch_size: 10, except_ids: [lfs_object_4.id])
expect(registries).to match_ids(registry_lfs_object_1, registry_lfs_object_remote_1)
end
end
describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries for LFS objects that have been synced and are missing on the primary' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
registry_lfs_object_2 = create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
registry_lfs_object_5 = create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(registries).to match_ids(registry_lfs_object_2, registry_lfs_object_5)
end
it 'excludes except_ids' do
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_1.id)
registry_lfs_object_2 = create(:geo_lfs_object_registry, lfs_object_id: lfs_object_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_4.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_remote_2.id, missing_on_primary: true)
create(:geo_lfs_object_registry, :never_synced, lfs_object_id: lfs_object_remote_3.id)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [lfs_object_5.id])
expect(registries).to match_ids(registry_lfs_object_2)
end
end
it_behaves_like 'a file registry finder'
end
......@@ -17,29 +17,29 @@ RSpec.describe Geo::ProjectRegistryFinder, :geo do
let_it_be(:registry_project_5) { create(:geo_project_registry, :wiki_dirty, project_id: project_5.id, last_repository_synced_at: 5.days.ago) }
let_it_be(:registry_project_6) { create(:geo_project_registry, project_id: project_6.id) }
describe '#find_never_synced_registries' do
it 'returns registries for projects that have never been synced' do
registries = subject.find_never_synced_registries(batch_size: 10)
describe '#find_registries_never_attempted_sync' do
it 'returns registries for projects that have never have an attempt to sync' do
registries = subject.find_registries_never_attempted_sync(batch_size: 10)
expect(registries).to match_ids(registry_project_3, registry_project_6)
end
it 'excludes except_ids' do
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [project_3.id])
registries = subject.find_registries_never_attempted_sync(batch_size: 10, except_ids: [project_3.id])
expect(registries).to match_ids(registry_project_6)
end
end
describe '#find_retryable_dirty_registries' do
it 'returns registries for projects that have been recently updated or that have never been synced' do
registries = subject.find_retryable_dirty_registries(batch_size: 10)
describe '#find_registries_needs_sync_again' do
it 'returns registries for dirty projects or that have failed to sync' do
registries = subject.find_registries_needs_sync_again(batch_size: 10)
expect(registries).to match_ids(registry_project_2, registry_project_3, registry_project_4, registry_project_5, registry_project_6)
end
it 'excludes except_ids' do
registries = subject.find_retryable_dirty_registries(batch_size: 10, except_ids: [project_4.id, project_5.id, project_6.id])
registries = subject.find_registries_needs_sync_again(batch_size: 10, except_ids: [project_4.id, project_5.id, project_6.id])
expect(registries).to match_ids(registry_project_2, registry_project_3)
end
......
......@@ -3,23 +3,23 @@
require 'spec_helper'
RSpec.describe Geo::ContainerRepositoryRegistry, :geo do
include ::EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::ContainerRepositoryRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:container_repository_registry, 10, created_at: Time.zone.now) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end
let_it_be(:registry) { create(:container_repository_registry) }
it_behaves_like 'a Geo registry' do
let(:registry) { create(:container_repository_registry) }
end
describe 'relationships' do
it { is_expected.to belong_to(:container_repository) }
end
it_behaves_like 'a Geo registry' do
let(:registry) { create(:container_repository_registry) }
end
describe '#finish_sync!' do
let(:registry) { create(:container_repository_registry, :sync_started) }
let_it_be(:registry) { create(:container_repository_registry, :sync_started) }
it 'finishes registry record' do
registry.finish_sync!
......@@ -58,6 +58,158 @@ RSpec.describe Geo::ContainerRepositoryRegistry, :geo do
end
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_synced_group) { create(:project, group: synced_group) }
let_it_be(:project_nested_group) { create(:project, group: nested_group) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:container_repository_1) { create(:container_repository, project: project_synced_group) }
let_it_be(:container_repository_2) { create(:container_repository, project: project_nested_group) }
let_it_be(:container_repository_3) { create(:container_repository) }
let_it_be(:container_repository_4) { create(:container_repository) }
let_it_be(:container_repository_5) { create(:container_repository, project: project_broken_storage) }
let_it_be(:container_repository_6) { create(:container_repository, project: project_broken_storage) }
before do
stub_current_geo_node(secondary)
stub_registry_replication_config(enabled: true)
end
context 'untracked IDs' do
before do
create(:container_repository_registry, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
create(:container_repository_registry, container_repository_id: container_repository_5.id)
end
it 'includes container registries IDs without an entry on the tracking database' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id, container_repository_4.id, container_repository_6.id])
end
it 'excludes container registries outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(container_repository_4.id..container_repository_6.id)
expect(untracked_ids).to match_array([container_repository_4.id, container_repository_6.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes container_registry IDs that projects are not in the selected namespaces' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes container_registry IDs that projects are not in the selected shards' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
before do
container_repository_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
it 'excludes IDs outside the ID range' do
range = (container_repository_1.id + 1)..ContainerRepository.maximum(:id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked container_registry' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_3.id) }
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_3.id..container_repository_3.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked container_registry' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
context 'excluded from selective sync' do
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_5.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_5.id..container_repository_5.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '.replication_enabled?' do
it 'returns true when registry replication is enabled' do
stub_geo_setting(registry_replication: { enabled: true })
......
......@@ -3,6 +3,8 @@
require 'spec_helper'
RSpec.describe Geo::DesignRegistry, :geo do
include ::EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::DesignRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_design_registry, 10, created_at: Time.zone.now) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
......@@ -16,6 +18,172 @@ RSpec.describe Geo::DesignRegistry, :geo do
let(:registry) { create(:geo_design_registry) }
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_1) { create(:project, group: synced_group) }
let_it_be(:project_2) { create(:project, group: nested_group) }
let_it_be(:project_3) { create(:project) }
let_it_be(:project_4) { create(:project) }
let_it_be(:project_5) { create(:project, :broken_storage) }
let_it_be(:project_6) { create(:project, :broken_storage) }
let_it_be(:project_7) { create(:project) }
before do
stub_current_geo_node(secondary)
end
before_all do
create(:design, project: project_1)
create(:design, project: project_2)
create(:design, project: project_3)
create(:design, project: project_4)
create(:design, project: project_5)
create(:design, project: project_6)
end
context 'untracked IDs' do
before do
create(:geo_design_registry, project_id: project_1.id)
create(:geo_design_registry, :sync_failed, project_id: project_3.id)
create(:geo_design_registry, project_id: project_5.id)
end
it 'includes project IDs without an entry on the tracking database' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id, project_4.id, project_6.id])
end
it 'excludes projects outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(project_4.id..project_6.id)
expect(untracked_ids).to match_array([project_4.id, project_6.id])
end
it 'excludes projects without designs' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).not_to include([project_7])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes project IDs that are not in selectively synced projects' do
range = Project.minimum(:id)..Project.maximum(:id)
untracked_ids, _ = described_class.find_registry_differences(range)
expect(untracked_ids).to match_array([project_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_design_registry, project_id: project_1.id) }
before do
project_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
it 'excludes IDs outside the ID range' do
range = (project_1.id + 1)..Project.maximum(:id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked project' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_3.id) }
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_3.id..project_3.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked project' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_1.id) }
context 'excluded from selective sync' do
it 'includes tracked project IDs that exist but are not in a selectively synced project' do
range = project_1.id..project_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([project_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:geo_design_registry, project_id: project_5.id) }
it 'excludes tracked project IDs that are in selectively synced projects' do
range = project_5.id..project_5.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
describe '#search' do
let!(:design_registry) { create(:geo_design_registry) }
let!(:failed_registry) { create(:geo_design_registry, :sync_failed) }
......
......@@ -23,4 +23,243 @@ RSpec.describe Geo::JobArtifactRegistry, :geo do
expect(described_class.where(id: ids).pluck(:success)).to eq([false])
end
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
stub_artifacts_object_storage
end
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) }
let_it_be(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let_it_be(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let_it_be(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) }
let_it_be(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) }
# Untracked IDs should not contain any of these expired job artifacts.
let_it_be(:ci_job_artifact_6) { create(:ci_job_artifact, :expired, project: synced_project) }
let_it_be(:ci_job_artifact_7) { create(:ci_job_artifact, :expired, project: unsynced_project) }
let_it_be(:ci_job_artifact_8) { create(:ci_job_artifact, :expired, project: project_broken_storage) }
let!(:ci_job_artifact_remote_4) { create(:ci_job_artifact, :expired, :remote_store) }
context 'untracked IDs' do
before do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
end
it 'includes job artifact IDs without an entry on the tracking database' do
untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id])
end
it 'excludes job artifacts outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id)
expect(untracked_ids).to match_array(
[ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes job artifacts in object storage' do
untracked_ids, _ = described_class.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with an expired registry' do
let!(:expired) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_6.id) }
it 'includes expired tracked IDs that exists in the model table' do
range = ci_job_artifact_6.id..ci_job_artifact_6.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_6.id])
end
it 'excludes IDs outside the ID range' do
range = (ci_job_artifact_6.id + 1)..(ci_job_artifact_6.id + 10)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id])
end
end
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced projects' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
it 'includes expired tracked IDs that are in selectively synced projects' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_6.id)
range = ci_job_artifact_6.id..ci_job_artifact_6.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_6.id])
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id])
end
end
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced shards' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
it 'includes expired tracked IDs that are in selectively synced shards' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_8.id)
range = ci_job_artifact_8.id..ci_job_artifact_8.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_8.id])
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked job artifact' do
context 'in object storage' do
it 'includes tracked job artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_1.id)
range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id])
end
it 'includes expired tracked IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_4.id)
range = ci_job_artifact_remote_4.id..ci_job_artifact_remote_4.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_4.id])
end
end
context 'not in object storage' do
it 'excludes tracked job artifact IDs that are not in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
range = ci_job_artifact_1.id..ci_job_artifact_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
end
......@@ -3,12 +3,203 @@
require 'spec_helper'
RSpec.describe Geo::LfsObjectRegistry, :geo do
describe 'relationships' do
it { is_expected.to belong_to(:lfs_object).class_name('LfsObject') }
end
include EE::GeoHelpers
it_behaves_like 'a BulkInsertSafe model', Geo::LfsObjectRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_lfs_object_registry, 10) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
end
describe 'relationships' do
it { is_expected.to belong_to(:lfs_object).class_name('LfsObject') }
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
before do
stub_current_geo_node(secondary)
stub_lfs_object_storage
end
let_it_be(:lfs_object_1) { create(:lfs_object) }
let_it_be(:lfs_object_2) { create(:lfs_object) }
let_it_be(:lfs_object_3) { create(:lfs_object) }
let_it_be(:lfs_object_4) { create(:lfs_object) }
let_it_be(:lfs_object_5) { create(:lfs_object) }
let!(:lfs_object_remote_1) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_2) { create(:lfs_object, :object_storage) }
let!(:lfs_object_remote_3) { create(:lfs_object, :object_storage) }
context 'untracked IDs' do
before do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
create(:geo_lfs_object_registry, :failed, lfs_object_id: lfs_object_3.id)
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_4.id)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: synced_project_in_nested_group, lfs_object: lfs_object_3)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_4)
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_5)
end
it 'includes LFS object IDs without an entry on the tracking database' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array(
[lfs_object_2.id, lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id, lfs_object_remote_3.id])
end
it 'excludes LFS objects outside the ID range' do
untracked_ids, _ = described_class.find_registry_differences(lfs_object_3.id..lfs_object_remote_2.id)
expect(untracked_ids).to match_array(
[lfs_object_5.id, lfs_object_remote_1.id,
lfs_object_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes LFS object IDs that are not in selectively synced projects' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes LFS objects in object storage' do
untracked_ids, _ = described_class.find_registry_differences(LfsObject.first.id..LfsObject.last.id)
expect(untracked_ids).to match_array([lfs_object_2.id, lfs_object_5.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_lfs_object_registry, lfs_object_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
range = 1..1000
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked LFS object' do
let!(:registry_entry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id) }
let(:range) { lfs_object_1.id..lfs_object_1.id }
context 'excluded from selective sync' do
it 'includes tracked LFS object IDs that exist but are not in a selectively synced project' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_1.id])
end
end
context 'included in selective sync' do
let!(:join_record) { create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_1) }
it 'excludes tracked LFS object IDs that are in selectively synced projects' do
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked LFS object' do
context 'in object storage' do
it 'includes tracked LFS object IDs that are in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_remote_1.id)
range = lfs_object_remote_1.id..lfs_object_remote_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([lfs_object_remote_1.id])
end
end
context 'not in object storage' do
it 'excludes tracked LFS object IDs that are not in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: lfs_object_1.id)
range = lfs_object_1.id..lfs_object_1.id
_, unused_tracked_ids = described_class.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
end
......@@ -5,9 +5,6 @@ require 'spec_helper'
RSpec.describe Geo::UploadRegistry, :geo do
include EE::GeoHelpers
let!(:failed) { create(:geo_upload_registry, :failed) }
let!(:synced) { create(:geo_upload_registry) }
it_behaves_like 'a BulkInsertSafe model', Geo::UploadRegistry do
let(:valid_items_for_bulk_insertion) { build_list(:geo_upload_registry, 10, created_at: Time.zone.now) }
let(:invalid_items_for_bulk_insertion) { [] } # class does not have any validations defined
......@@ -19,20 +16,75 @@ RSpec.describe Geo::UploadRegistry, :geo do
expect(described_class.find(registry.id).upload).to be_an_instance_of(Upload)
end
describe '.find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:project) { create(:project) }
let_it_be(:upload_1) { create(:upload, model: project) }
let_it_be(:upload_2) { create(:upload, model: project) }
let_it_be(:upload_3) { create(:upload, :issuable_upload, model: project) }
let_it_be(:upload_4) { create(:upload, model: project) }
let_it_be(:upload_5) { create(:upload, model: project) }
let_it_be(:upload_6) { create(:upload, :personal_snippet_upload) }
let_it_be(:upload_7) { create(:upload, :object_storage, model: project) }
let_it_be(:upload_8) { create(:upload, :object_storage, model: project) }
let_it_be(:upload_9) { create(:upload, :object_storage, model: project) }
before do
stub_current_geo_node(secondary)
end
it 'returns untracked IDs as well as tracked IDs that are unused', :aggregate_failures do
max_id = Upload.maximum(:id)
create(:geo_upload_registry, :avatar, file_id: upload_1.id)
create(:geo_upload_registry, :file, file_id: upload_3.id)
create(:geo_upload_registry, :avatar, file_id: upload_5.id)
create(:geo_upload_registry, :personal_file, file_id: upload_6.id)
create(:geo_upload_registry, :avatar, file_id: upload_7.id)
unused_registry_1 = create(:geo_upload_registry, :attachment, file_id: max_id + 1)
unused_registry_2 = create(:geo_upload_registry, :personal_file, file_id: max_id + 2)
range = 1..(max_id + 2)
untracked, unused = described_class.find_registry_differences(range)
expected_untracked = [
[upload_2.id, 'avatar'],
[upload_4.id, 'avatar'],
[upload_8.id, 'avatar'],
[upload_9.id, 'avatar']
]
expected_unused = [
[unused_registry_1.file_id, 'attachment'],
[unused_registry_2.file_id, 'personal_file']
]
expect(untracked).to match_array(expected_untracked)
expect(unused).to match_array(expected_unused)
end
end
describe '.failed' do
it 'returns registries in the failed state' do
failed = create(:geo_upload_registry, :failed)
create(:geo_upload_registry)
expect(described_class.failed).to match_ids(failed)
end
end
describe '.synced' do
it 'returns registries in the synced state' do
create(:geo_upload_registry, :failed)
synced = create(:geo_upload_registry)
expect(described_class.synced).to match_ids(synced)
end
end
describe '.retry_due' do
it 'returns registries in the synced state' do
failed = create(:geo_upload_registry, :failed)
synced = create(:geo_upload_registry)
retry_yesterday = create(:geo_upload_registry, retry_at: Date.yesterday)
create(:geo_upload_registry, retry_at: Date.tomorrow)
......@@ -40,11 +92,13 @@ RSpec.describe Geo::UploadRegistry, :geo do
end
end
describe '.never' do
describe '.never_attempted_sync' do
it 'returns registries that are never synced' do
never = create(:geo_upload_registry, retry_count: nil, success: false)
create(:geo_upload_registry, :failed)
create(:geo_upload_registry)
pending = create(:geo_upload_registry, retry_count: nil, success: false)
expect(described_class.never).to match_ids([never])
expect(described_class.never_attempted_sync).to match_ids([pending])
end
end
......@@ -55,12 +109,12 @@ RSpec.describe Geo::UploadRegistry, :geo do
described_class.with_status('synced')
end
# Explained via: https://gitlab.com/gitlab-org/gitlab/-/issues/216049
it 'finds the registries with status "never" when filter is set to "pending"' do
expect(described_class).to receive(:never)
it 'finds the registries with status "never_attempted_sync" when filter is set to "pending"' do
expect(described_class).to receive(:never_attempted_sync)
described_class.with_status('pending')
end
it 'finds the registries with status "failed"' do
expect(described_class).to receive(:failed)
......@@ -93,6 +147,9 @@ RSpec.describe Geo::UploadRegistry, :geo do
end
describe '#synchronization_state' do
let_it_be(:failed) { create(:geo_upload_registry, :failed) }
let_it_be(:synced) { create(:geo_upload_registry) }
it 'returns :synced for a successful synced registry' do
expect(synced.synchronization_state).to eq(:synced)
end
......
# frozen_string_literal: true
RSpec.shared_examples 'a file registry finder' do
include_examples 'a registry finder'
it 'responds to file registry finder methods' do
file_registry_finder_methods = %i{
synced_missing_on_primary_count
find_retryable_synced_missing_on_primary_registries
}
file_registry_finder_methods.each do |method|
expect(subject).to respond_to(method)
end
end
describe '#synced_missing_on_primary_count' do
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
expect(subject.synced_missing_on_primary_count).to eq 2
end
end
describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries that have been synced and are missing on the primary' do
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(registries).to match_ids(registry_2, registry_5)
end
it 'excludes except_ids' do
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [replicable_5.id])
expect(registries).to match_ids(registry_2)
end
end
end
# frozen_string_literal: true
RSpec.shared_examples 'a file registry finder' do
it 'responds to file registry finder methods' do
file_registry_finder_methods = %i{
registry_class
registry_count
synced_count
failed_count
count_synced_missing_on_primary
find_retryable_failed_registries
find_retryable_synced_missing_on_primary_registries
}
file_registry_finder_methods.each do |method|
expect(subject).to respond_to(method)
end
end
end
# frozen_string_literal: true
RSpec.shared_examples 'a registry finder' do
it 'responds to registry finder methods' do
registry_finder_methods = %i{
failed_count
find_registries_never_attempted_sync
find_registries_needs_sync_again
registry_class
registry_count
synced_count
}
registry_finder_methods.each do |method|
expect(subject).to respond_to(method)
end
end
describe '#registry_count' do
it 'counts registries' do
expect(subject.registry_count).to eq 8
end
end
describe '#synced_count' do
it 'counts registries that has been synced' do
expect(subject.synced_count).to eq 2
end
end
describe '#failed_count' do
it 'counts registries that sync has failed' do
expect(subject.failed_count).to eq 4
end
end
describe '#find_registries_never_attempted_sync' do
it 'returns registries that have never been synced' do
registries = subject.find_registries_never_attempted_sync(batch_size: 10)
expect(registries).to match_ids(registry_3, registry_8)
end
it 'excludes except_ids' do
registries = subject.find_registries_never_attempted_sync(batch_size: 10, except_ids: [replicable_3.id])
expect(registries).to match_ids(registry_8)
end
end
describe '#find_registries_needs_sync_again' do
it 'returns registries for that have failed to sync' do
registries = subject.find_registries_needs_sync_again(batch_size: 10)
expect(registries).to match_ids(registry_1, registry_4, registry_6, registry_7)
end
it 'excludes except_ids' do
registries = subject.find_registries_needs_sync_again(batch_size: 10, except_ids: [replicable_4.id, replicable_7.id])
expect(registries).to match_ids(registry_1, registry_6)
end
end
end
......@@ -9,26 +9,26 @@ RSpec.shared_examples 'a Geo framework registry' do
let!(:unsynced_item1) { create(registry_class_factory) }
let!(:unsynced_item2) { create(registry_class_factory) }
describe '.find_unsynced_registries' do
describe '.find_registries_never_attempted_sync' do
it 'returns unsynced items' do
result = described_class.find_unsynced_registries(batch_size: 10)
result = described_class.find_registries_never_attempted_sync(batch_size: 10)
expect(result).to include(unsynced_item1, unsynced_item2)
end
it 'returns unsynced items except some specific item ID' do
it 'returns items that never have an attempt to sync except some specific item ID' do
except_id = unsynced_item1.model_record_id
result = described_class.find_unsynced_registries(batch_size: 10, except_ids: [except_id])
result = described_class.find_registries_never_attempted_sync(batch_size: 10, except_ids: [except_id])
expect(result).to include(unsynced_item2)
expect(result).not_to include(unsynced_item1)
end
end
describe '.find_failed_registries' do
describe '.find_registries_needs_sync_again' do
it 'returns failed items' do
result = described_class.find_failed_registries(batch_size: 10)
result = described_class.find_registries_needs_sync_again(batch_size: 10)
expect(result).to include(failed_item1, failed_item2)
end
......@@ -36,7 +36,7 @@ RSpec.shared_examples 'a Geo framework registry' do
it 'returns failed items except some specific item ID' do
except_id = failed_item1.model_record_id
result = described_class.find_failed_registries(batch_size: 10, except_ids: [except_id])
result = described_class.find_registries_needs_sync_again(batch_size: 10, except_ids: [except_id])
expect(result).to include(failed_item2)
expect(result).not_to include(failed_item1)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment