Commit daa1639e authored by Michael Kozono's avatar Michael Kozono

Merge branch...

Merge branch '217477-remove-feature-flags-to-make-registry-table-ssot-for-job-artifacts' into 'master'

Remove feature flag to make registry table SSOT for Job Artifacts

See merge request gitlab-org/gitlab!34590
parents 62f9013b be130b7b
...@@ -2,10 +2,8 @@ ...@@ -2,10 +2,8 @@
module Geo module Geo
class JobArtifactRegistryFinder < FileRegistryFinder class JobArtifactRegistryFinder < FileRegistryFinder
# Counts all existing registries independent
# of any change on filters / selective sync
def count_registry def count_registry
Geo::JobArtifactRegistry.count syncable.count
end end
def count_syncable def count_syncable
...@@ -13,22 +11,19 @@ module Geo ...@@ -13,22 +11,19 @@ module Geo
end end
def count_synced def count_synced
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.synced).count syncable.synced.count
end end
def count_failed def count_failed
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.failed).count syncable.failed.count
end end
def count_synced_missing_on_primary def count_synced_missing_on_primary
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.synced.missing_on_primary).count syncable.synced.missing_on_primary.count
end end
def syncable def syncable
return job_artifacts.not_expired if selective_sync? Geo::JobArtifactRegistry
return Ci::JobArtifact.not_expired.with_files_stored_locally if local_storage_only?
Ci::JobArtifact.not_expired
end end
# Returns untracked IDs as well as tracked IDs that are unused. # Returns untracked IDs as well as tracked IDs that are unused.
...@@ -49,16 +44,8 @@ module Geo ...@@ -49,16 +44,8 @@ module Geo
# #
# @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused # @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused
def find_registry_differences(range) def find_registry_differences(range)
# rubocop:disable CodeReuse/ActiveRecord source_ids = job_artifacts.id_in(range).pluck(::Ci::JobArtifact.arel_table[:id]) # rubocop:disable CodeReuse/ActiveRecord
source_ids = tracked_ids = syncable.pluck_model_ids_in_range(range)
job_artifacts(fdw: false)
.id_in(range)
.pluck(::Ci::JobArtifact.arel_table[:id])
# rubocop:enable CodeReuse/ActiveRecord
tracked_ids =
Geo::JobArtifactRegistry
.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids unused_tracked_ids = tracked_ids - source_ids
...@@ -84,49 +71,17 @@ module Geo ...@@ -84,49 +71,17 @@ module Geo
# @param [Array<Integer>] except_ids ids that will be ignored from the query # @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord # rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: []) def find_never_synced_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry syncable
.never .never
.model_id_not_in(except_ids) .model_id_not_in(except_ids)
.limit(batch_size) .limit(batch_size)
end end
alias_method :find_unsynced, :find_never_synced_registries
# rubocop:enable CodeReuse/ActiveRecord # rubocop:enable CodeReuse/ActiveRecord
# Deprecated in favor of the process using
# #find_registry_differences and #find_never_synced_registries
#
# Find limited amount of non replicated job artifacts.
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop: disable CodeReuse/ActiveRecord
def find_unsynced(batch_size:, except_ids: [])
job_artifacts
.not_expired
.missing_job_artifact_registry
.id_not_in(except_ids)
.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_migrated_local(batch_size:, except_ids: [])
all_job_artifacts
.inner_join_job_artifact_registry
.with_files_stored_remotely
.id_not_in(except_ids)
.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def find_retryable_failed_registries(batch_size:, except_ids: []) def find_retryable_failed_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry syncable
.failed .failed
.retry_due .retry_due
.model_id_not_in(except_ids) .model_id_not_in(except_ids)
...@@ -136,7 +91,7 @@ module Geo ...@@ -136,7 +91,7 @@ module Geo
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
def find_retryable_synced_missing_on_primary_registries(batch_size:, except_ids: []) def find_retryable_synced_missing_on_primary_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry syncable
.synced .synced
.missing_on_primary .missing_on_primary
.retry_due .retry_due
...@@ -147,18 +102,12 @@ module Geo ...@@ -147,18 +102,12 @@ module Geo
private private
def job_artifacts(fdw: true) def job_artifacts
local_storage_only?(fdw: fdw) ? all_job_artifacts(fdw: fdw).with_files_stored_locally : all_job_artifacts(fdw: fdw) local_storage_only?(fdw: false) ? all_job_artifacts.with_files_stored_locally : all_job_artifacts
end
def all_job_artifacts(fdw: true)
current_node(fdw: fdw).job_artifacts
end end
def registries_for_job_artifacts def all_job_artifacts
job_artifacts current_node(fdw: false).job_artifacts
.inner_join_job_artifact_registry
.not_expired
end end
end end
end end
...@@ -9,16 +9,7 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry ...@@ -9,16 +9,7 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry
scope :never, -> { where(success: false, retry_count: nil) } scope :never, -> { where(success: false, retry_count: nil) }
def self.failed def self.failed
if registry_consistency_worker_enabled? where(success: false).where.not(retry_count: nil)
where(success: false).where.not(retry_count: nil)
else
# Would do `super` except it doesn't work with an included scope
where(success: false)
end
end
def self.registry_consistency_worker_enabled?
Feature.enabled?(:geo_job_artifact_registry_ssot_sync, default_enabled: true)
end end
def self.finder_class def self.finder_class
......
...@@ -12,13 +12,9 @@ module Geo ...@@ -12,13 +12,9 @@ module Geo
end end
def find_unsynced_jobs(batch_size:) def find_unsynced_jobs(batch_size:)
if Geo::JobArtifactRegistry.registry_consistency_worker_enabled? convert_registry_relation_to_job_args(
convert_registry_relation_to_job_args( registry_finder.find_never_synced_registries(find_batch_params(batch_size))
registry_finder.find_never_synced_registries(find_batch_params(batch_size)) )
)
else
super
end
end end
end end
end end
......
...@@ -37,10 +37,7 @@ module Geo ...@@ -37,10 +37,7 @@ module Geo
end end
def find_migrated_local_objects(batch_size:) def find_migrated_local_objects(batch_size:)
attachment_ids = find_migrated_local_attachments_ids(batch_size: batch_size) find_migrated_local_attachments_ids(batch_size: batch_size)
job_artifact_ids = find_migrated_local_job_artifacts_ids(batch_size: batch_size)
take_batch(attachment_ids, job_artifact_ids)
end end
# rubocop: disable CodeReuse/ActiveRecord # rubocop: disable CodeReuse/ActiveRecord
...@@ -53,16 +50,6 @@ module Geo ...@@ -53,16 +50,6 @@ module Geo
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_migrated_local_job_artifacts_ids(batch_size:)
return [] unless job_artifacts_object_store_enabled?
job_artifacts_finder.find_migrated_local(batch_size: batch_size, except_ids: scheduled_file_ids(:job_artifact))
.pluck(Geo::Fdw::Ci::JobArtifact.arel_table[:id])
.map { |id| ['job_artifact', id] }
end
# rubocop: enable CodeReuse/ActiveRecord
def scheduled_file_ids(file_types) def scheduled_file_ids(file_types)
file_types = Array(file_types) file_types = Array(file_types)
file_types = file_types.map(&:to_s) file_types = file_types.map(&:to_s)
...@@ -74,13 +61,8 @@ module Geo ...@@ -74,13 +61,8 @@ module Geo
FileUploader.object_store_enabled? FileUploader.object_store_enabled?
end end
def job_artifacts_object_store_enabled?
JobArtifactUploader.object_store_enabled?
end
def object_store_enabled? def object_store_enabled?
attachments_object_store_enabled? || attachments_object_store_enabled?
job_artifacts_object_store_enabled?
end end
def sync_object_storage_enabled? def sync_object_storage_enabled?
...@@ -90,9 +72,5 @@ module Geo ...@@ -90,9 +72,5 @@ module Geo
def attachments_finder def attachments_finder
@attachments_finder ||= AttachmentRegistryFinder.new(current_node_id: current_node.id) @attachments_finder ||= AttachmentRegistryFinder.new(current_node_id: current_node.id)
end end
def job_artifacts_finder
@job_artifacts_finder ||= JobArtifactRegistryFinder.new(current_node_id: current_node.id)
end
end end
end end
---
title: 'Geo: Make registry table SSOT for job artifacts'
merge_request: 34590
author:
type: performance
...@@ -2,495 +2,394 @@ ...@@ -2,495 +2,394 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do RSpec.describe Geo::JobArtifactRegistryFinder, :geo do
include ::EE::GeoHelpers include ::EE::GeoHelpers
# Using let() instead of set() because set() does not work properly let_it_be(:secondary) { create(:geo_node) }
# when using the :delete DatabaseCleaner strategy, which is required for FDW
# tests because a foreign table can't see changes inside a transaction of a
# different connection.
let(:secondary) { create(:geo_node) }
let(:synced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project) }
let(:project_broken_storage) { create(:project, :broken_storage) }
subject { described_class.new(current_node_id: secondary.id) }
before do before do
stub_current_geo_node(secondary) stub_current_geo_node(secondary)
stub_artifacts_object_storage stub_artifacts_object_storage
end end
let!(:job_artifact_synced_project) { create(:ci_job_artifact, project: synced_project) } let_it_be(:synced_group) { create(:group) }
let!(:job_artifact_unsynced_project) { create(:ci_job_artifact, project: unsynced_project) } let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let!(:job_artifact_broken_storage_1) { create(:ci_job_artifact, project: project_broken_storage) } let_it_be(:synced_project) { create(:project, group: synced_group) }
let!(:job_artifact_broken_storage_2) { create(:ci_job_artifact, project: project_broken_storage) } let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let!(:job_artifact_expired_synced_project) { create(:ci_job_artifact, :expired, project: synced_project) } let_it_be(:unsynced_project) { create(:project) }
let!(:job_artifact_expired_broken_storage) { create(:ci_job_artifact, :expired, project: project_broken_storage) } let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let!(:job_artifact_remote_synced_project) { create(:ci_job_artifact, :remote_store, project: synced_project) }
let!(:job_artifact_remote_unsynced_project) { create(:ci_job_artifact, :remote_store, project: unsynced_project) } let!(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) }
let!(:job_artifact_remote_broken_storage) { create(:ci_job_artifact, :expired, :remote_store, project: project_broken_storage) } let!(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let!(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
context 'counts all the things' do let!(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) }
describe '#count_syncable' do let!(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) }
it 'counts non-expired job artifacts' do let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) }
expect(subject.count_syncable).to eq 6 let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) }
end let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) }
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 2
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts non-expired job artifacts' do subject { described_class.new(current_node_id: secondary.id) }
expect(subject.count_syncable).to eq 2
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts non-expired job artifacts' do describe '#count_syncable' do
expect(subject.count_syncable).to eq 4 it 'counts registries for job artifacts' do
end create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
end create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.count_syncable).to eq 8
end end
end
describe '#count_synced' do describe '#count_registry' do
before do it 'counts registries for job artifacts' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false) create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id) create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id, success: false) create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_synced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id) create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
end create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'without selective sync' do expect(subject.count_registry).to eq 8
it 'counts job artifacts that have been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 3
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 1
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 1
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 2
end
end
end end
end
describe '#count_failed' do describe '#count_synced' do
before do it 'counts registries that has been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_synced_project.id) create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_1.id) create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_expired_synced_project.id) create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_expired_broken_storage.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_remote_synced_project.id) create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_remote_broken_storage.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
end create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'without selective sync' do expect(subject.count_synced).to eq 3
it 'counts job artifacts that sync has failed ignoring expired ones' do end
expect(subject.count_failed).to eq 3 end
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 2
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 1
end
end
context 'with object storage sync disabled' do describe '#count_failed' do
let(:secondary) { create(:geo_node, :local_storage_only) } it 'counts registries that sync has failed' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.count_failed).to eq 3
end
end
it 'counts job artifacts that sync has failed ignoring expired ones' do describe '#count_synced_missing_on_primary' do
expect(subject.count_failed).to eq 2 it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
end create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
end create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(subject.count_synced_missing_on_primary).to eq 3
end end
end
describe '#count_synced_missing_on_primary' do describe '#find_registry_differences' do
context 'untracked IDs' do
before do before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false, missing_on_primary: false) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id) create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id, missing_on_primary: true) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id, missing_on_primary: false)
end
context 'without selective sync' do
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 2
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 1
end
end end
context 'with selective sync by shard' do it 'includes job artifact IDs without an entry on the tracking database' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do expect(untracked_ids).to match_array(
expect(subject.count_synced_missing_on_primary).to eq 1 [ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
end ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id])
end end
context 'with object storage sync disabled' do it 'excludes job artifacts outside the ID range' do
let(:secondary) { create(:geo_node, :local_storage_only) } untracked_ids, _ = subject.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id)
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do expect(untracked_ids).to match_array(
expect(subject.count_synced_missing_on_primary).to eq 1 [ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
end ci_job_artifact_remote_2.id])
end
end
describe '#count_registry' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
end
it 'counts file registries for job artifacts' do
expect(subject.count_registry).to eq 4
end end
context 'with selective sync by namespace' do context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'does not apply the selective sync restriction' do it 'excludes job artifact IDs that are not in selectively synced projects' do
expect(subject.count_registry).to eq 4 untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_2.id])
end end
end end
context 'with selective sync by shard' do context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'does not apply the selective sync restriction' do it 'excludes job artifact IDs that are not in selectively synced projects' do
expect(subject.count_registry).to eq 4 untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([ci_job_artifact_5.id])
end end
end end
context 'with object storage sync disabled' do context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) } let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts file registries for job artifacts ignoring remote artifacts' do it 'excludes job artifacts in object storage' do
expect(subject.count_registry).to eq 4
end
end
end
end
context 'finds all the things' do
describe '#find_registry_differences' do
context 'untracked IDs' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id)
end
it 'includes Job Artifact IDs without an entry on the tracking database' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id) untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array( expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id])
[job_artifact_unsynced_project.id, job_artifact_remote_synced_project.id,
job_artifact_broken_storage_2.id, job_artifact_expired_synced_project.id,
job_artifact_remote_broken_storage.id])
end end
end
end
it 'excludes Job Artifacts outside the ID range' do context 'unused tracked IDs' do
untracked_ids, _ = subject.find_registry_differences(job_artifact_unsynced_project.id..job_artifact_broken_storage_2.id) context 'with an orphaned registry' do
let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) }
expect(untracked_ids).to match_array(
[job_artifact_unsynced_project.id, job_artifact_broken_storage_2.id])
end
context 'with selective sync by namespace' do it 'includes tracked IDs that do not exist in the model table' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } range = non_existing_record_id..non_existing_record_id
it 'excludes Job Artifacts that are not in selectively synced projects' do _, unused_tracked_ids = subject.find_registry_differences(range)
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([job_artifact_expired_synced_project.id, job_artifact_remote_synced_project.id]) expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
end end
context 'with selective sync by shard' do it 'excludes IDs outside the ID range' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } range = 1..1000
it 'excludes Job Artifacts that are not in selectively synced projects' do _, unused_tracked_ids = subject.find_registry_differences(range)
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([job_artifact_broken_storage_2.id, job_artifact_remote_broken_storage.id]) expect(unused_tracked_ids).to be_empty
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes Job Artifacts in object storage' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[job_artifact_unsynced_project.id, job_artifact_broken_storage_2.id,
job_artifact_expired_synced_project.id])
end
end end
end end
context 'unused tracked IDs' do context 'with selective sync by namespace' do
context 'with an orphaned registry' do let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
_, unused_tracked_ids = subject.find_registry_differences(non_existing_record_id..non_existing_record_id)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
_, unused_tracked_ids = subject.find_registry_differences(1..1000)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do context 'with a tracked job artifact' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id }
context 'with a tracked Job Artifact' do context 'excluded from selective sync' do
it 'includes tracked Job Artifact IDs that exist but are not in a selectively synced project' do it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_unsynced_project.id) _, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_unsynced_project.id]) expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id])
end end
end end
context 'without a tracked Job Artifact' do context 'included in selective sync' do
it 'returns empty' do it 'excludes tracked job artifact IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_unsynced_project.id) _, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty expect(unused_tracked_ids).to be_empty
end end
end end
end end
end
context 'with selective sync by shard' do context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id }
context 'with a tracked Job Artifact' do context 'excluded from selective sync' do
it 'includes tracked Job Artifact IDs that exist but are not in a selectively synced project' do it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id)
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_broken_storage_1.id) _, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_synced_project.id]) expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id])
end end
end end
context 'without a tracked Job Artifact' do context 'included in selective sync' do
it 'returns empty' do it 'excludes tracked job artifact IDs that are in selectively synced shards' do
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_broken_storage_1.id) _, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty expect(unused_tracked_ids).to be_empty
end end
end end
end end
end
context 'with object storage sync disabled' do context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) } let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked Job Artifact' do context 'with a tracked job artifact' do
context 'in object storage' do context 'in object storage' do
it 'includes tracked Job Artifact IDs that are in object storage' do it 'includes tracked job artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_1.id)
range = job_artifact_remote_synced_project.id..job_artifact_remote_synced_project.id range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id
_, unused_tracked_ids = subject.find_registry_differences(range) _, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_remote_synced_project.id]) expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id])
end
end end
end
context 'not in object storage' do context 'not in object storage' do
it 'excludes tracked Job Artifact IDs that are not in object storage' do it 'excludes tracked job artifact IDs that are not in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: job_artifact_synced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
range = job_artifact_synced_project.id..job_artifact_synced_project.id range = ci_job_artifact_1.id..ci_job_artifact_1.id
_, unused_tracked_ids = subject.find_registry_differences(range) _, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty expect(unused_tracked_ids).to be_empty
end
end end
end end
end end
end end
end end
end
describe '#find_never_synced_registries' do describe '#find_never_synced_registries' do
let!(:registry_job_artifact_1) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_synced_project.id) } it 'returns registries for job artifacts that have never been synced' do
let!(:registry_job_artifact_2) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_unsynced_project.id) } create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
let!(:registry_job_artifact_3) { create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id) } create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
let!(:registry_job_artifact_4) { create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_2.id) } registry_ci_job_artifact_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
let!(:registry_job_artifact_remote_1) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_remote_synced_project.id) } create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns registries for Job Artifacts that have never been synced' do registries = subject.find_never_synced_registries(batch_size: 10)
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_job_artifact_1, registry_job_artifact_2, registry_job_artifact_remote_1) expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3)
end
end end
describe '#find_unsynced' do it 'excludes except_ids' do
before do create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id, success: true) create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id, success: true) create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
end create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
context 'without selective sync' do create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
job_artifacts = subject.find_unsynced(batch_size: 10, except_ids: [job_artifact_unsynced_project.id])
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project, job_artifact_remote_unsynced_project, registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [ci_job_artifact_3.id])
job_artifact_broken_storage_2)
end
end
context 'with selective sync by namespace' do expect(registries).to match_ids(registry_ci_job_artifact_remote_3)
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } end
end
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do
job_artifacts = subject.find_unsynced(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project)
end
end
context 'with selective sync by shard' do describe '#find_unsynced' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } it 'returns registries for job artifacts that have never been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
registry_ci_job_artifact_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do registries = subject.find_unsynced(batch_size: 10)
job_artifacts = subject.find_unsynced(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_broken_storage_2) expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3)
end end
end
context 'with object storage sync disabled' do it 'excludes except_ids' do
let(:secondary) { create(:geo_node, :local_storage_only) } create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones and remotes' do registries = subject.find_unsynced(batch_size: 10, except_ids: [ci_job_artifact_3.id])
job_artifacts = subject.find_unsynced(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_unsynced_project, job_artifact_broken_storage_2) expect(registries).to match_ids(registry_ci_job_artifact_remote_3)
end
end
end end
end
describe '#find_migrated_local' do describe '#find_retryable_failed_registries' do
before do it 'returns registries for job artifacts that have failed to sync' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id) registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id) create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id) create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_broken_storage.id) registry_ci_job_artifact_4 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
end create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
it 'returns job artifacts excluding ones from the exception list' do create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
job_artifacts = subject.find_migrated_local(batch_size: 10, except_ids: [job_artifact_remote_synced_project.id]) create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
expect(job_artifacts).to match_ids(job_artifact_remote_unsynced_project, job_artifact_remote_broken_storage)
end
it 'includes synced job artifacts that are expired, exclude stored locally' do registries = subject.find_retryable_failed_registries(batch_size: 10)
job_artifacts = subject.find_migrated_local(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project, job_artifact_remote_unsynced_project, expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_4, registry_ci_job_artifact_remote_1)
job_artifact_remote_broken_storage) end
end
context 'with selective sync by namespace' do it 'excludes except_ids' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) } registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts remotely and successfully synced locally' do registries = subject.find_retryable_failed_registries(batch_size: 10, except_ids: [ci_job_artifact_4.id])
job_artifacts = subject.find_migrated_local(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project) expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_remote_1)
end end
end end
context 'with selective sync by shard' do describe '#find_retryable_synced_missing_on_primary_registries' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) } it 'returns registries for job artifacts that have been synced and are missing on the primary' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
registry_ci_job_artifact_5 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts remotely and successfully synced locally' do registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
job_artifacts = subject.find_migrated_local(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_broken_storage) expect(registries).to match_ids(registry_ci_job_artifact_2, registry_ci_job_artifact_5)
end end
end
context 'with object storage sync disabled' do it 'excludes except_ids' do
let(:secondary) { create(:geo_node, :local_storage_only) } create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts excluding ones from the exception list' do registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [ci_job_artifact_5.id])
job_artifacts = subject.find_migrated_local(batch_size: 10, except_ids: [job_artifact_remote_synced_project.id])
expect(job_artifacts).to match_ids(job_artifact_remote_unsynced_project, job_artifact_remote_broken_storage) expect(registries).to match_ids(registry_ci_job_artifact_2)
end
end
end end
end end
......
...@@ -321,40 +321,16 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do ...@@ -321,40 +321,16 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do
end end
describe '#job_artifacts_failed_count' do describe '#job_artifacts_failed_count' do
context 'when geo_job_artifact_registry_ssot_sync is disabled' do it 'counts failed job artifacts' do
before do # These should be ignored
stub_feature_flags(geo_job_artifact_registry_ssot_sync: false) create(:geo_upload_registry, :failed)
end create(:geo_upload_registry, :avatar, :failed)
create(:geo_upload_registry, :attachment, :failed)
it 'counts failed job artifacts' do create(:geo_job_artifact_registry, :with_artifact, success: true)
# These should be ignored
create(:geo_upload_registry, :failed)
create(:geo_upload_registry, :avatar, :failed)
create(:geo_upload_registry, :attachment, :failed)
create(:geo_job_artifact_registry, :with_artifact, success: true)
create(:geo_job_artifact_registry, :with_artifact, success: false)
expect(subject.job_artifacts_failed_count).to eq(1)
end
end
context 'when geo_job_artifact_registry_ssot_sync is enabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: true)
end
it 'counts failed job artifacts' do
# These should be ignored
create(:geo_upload_registry, :failed)
create(:geo_upload_registry, :avatar, :failed)
create(:geo_upload_registry, :attachment, :failed)
create(:geo_job_artifact_registry, :with_artifact, success: true)
create(:geo_job_artifact_registry, :with_artifact, :failed) create(:geo_job_artifact_registry, :with_artifact, :failed)
expect(subject.job_artifacts_failed_count).to eq(1) expect(subject.job_artifacts_failed_count).to eq(1)
end
end end
end end
......
...@@ -361,223 +361,107 @@ RSpec.describe Geo::FileDownloadDispatchWorker, :geo, :geo_fdw, :use_sql_query_c ...@@ -361,223 +361,107 @@ RSpec.describe Geo::FileDownloadDispatchWorker, :geo, :geo_fdw, :use_sql_query_c
end end
context 'with job artifacts' do context 'with job artifacts' do
context 'with geo_job_artifact_registry_ssot_sync feature enabled' do it 'performs Geo::FileDownloadWorker for unsynced job artifacts' do
before do registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced)
stub_feature_flags(geo_job_artifact_registry_ssot_sync: true)
end
it 'performs Geo::FileDownloadWorker for unsynced job artifacts' do
registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced)
expect(Geo::FileDownloadWorker).to receive(:perform_async)
.with('job_artifact', registry.artifact_id).once.and_return(spy)
subject.perform
end
it 'performs Geo::FileDownloadWorker for failed-sync job artifacts' do
registry = create(:geo_job_artifact_registry, :with_artifact, :failed)
expect(Geo::FileDownloadWorker).to receive(:perform_async)
.with('job_artifact', registry.artifact_id).once.and_return(spy)
subject.perform
end
it 'does not perform Geo::FileDownloadWorker for synced job artifacts' do
registry = create(:geo_job_artifact_registry, :with_artifact, bytes: 1234, success: true)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
.with('job_artifact', registry.artifact_id)
subject.perform
end
it 'does not perform Geo::FileDownloadWorker for synced job artifacts even with 0 bytes downloaded' do
registry = create(:geo_job_artifact_registry, :with_artifact, bytes: 0, success: true)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
.with('job_artifact', registry.artifact_id)
subject.perform
end
it 'does not retry failed artifacts when retry_at is tomorrow' do
registry = create(:geo_job_artifact_registry, :with_artifact, :failed, retry_at: Date.tomorrow)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
.with('job_artifact', registry.artifact_id)
subject.perform
end
it 'retries failed artifacts when retry_at is in the past' do
registry = create(:geo_job_artifact_registry, :with_artifact, :failed, retry_at: Date.yesterday)
expect(Geo::FileDownloadWorker).to receive(:perform_async)
.with('job_artifact', registry.artifact_id).once.and_return(spy)
subject.perform
end
context 'with files missing on the primary that are marked as synced' do expect(Geo::FileDownloadWorker).to receive(:perform_async)
let!(:artifact_file_missing_on_primary) { create(:ci_job_artifact) } .with('job_artifact', registry.artifact_id).once.and_return(spy)
let!(:artifact_registry) { create(:geo_job_artifact_registry, artifact_id: artifact_file_missing_on_primary.id, bytes: 1234, success: true, missing_on_primary: true) }
it 'retries the files if there is spare capacity' do subject.perform
registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced) end
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', registry.artifact_id)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform
end
it 'retries failed files with retry_at in the past' do it 'performs Geo::FileDownloadWorker for failed-sync job artifacts' do
artifact_registry.update!(retry_at: Date.yesterday) registry = create(:geo_job_artifact_registry, :with_artifact, :failed)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id) expect(Geo::FileDownloadWorker).to receive(:perform_async)
.with('job_artifact', registry.artifact_id).once.and_return(spy)
subject.perform subject.perform
end end
it 'does not retry files with later retry_at' do it 'does not perform Geo::FileDownloadWorker for synced job artifacts' do
artifact_registry.update!(retry_at: Date.tomorrow) registry = create(:geo_job_artifact_registry, :with_artifact, bytes: 1234, success: true)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id) expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
.with('job_artifact', registry.artifact_id)
subject.perform subject.perform
end end
it 'does not retry those files if there is no spare capacity' do it 'does not perform Geo::FileDownloadWorker for synced job artifacts even with 0 bytes downloaded' do
registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced) registry = create(:geo_job_artifact_registry, :with_artifact, bytes: 0, success: true)
expect(subject).to receive(:db_retrieve_batch_size).and_return(1).twice expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', registry.artifact_id) .with('job_artifact', registry.artifact_id)
subject.perform subject.perform
end end
it 'does not retry those files if they are already scheduled' do it 'does not retry failed artifacts when retry_at is tomorrow' do
registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced) registry = create(:geo_job_artifact_registry, :with_artifact, :failed, retry_at: Date.tomorrow)
scheduled_jobs = [{ type: 'job_artifact', id: artifact_file_missing_on_primary.id, job_id: 'foo' }] expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
expect(subject).to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(1) .with('job_artifact', registry.artifact_id)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', registry.artifact_id)
subject.perform subject.perform
end
end
end end
context 'with geo_job_artifact_registry_ssot_sync feature disabled' do it 'retries failed artifacts when retry_at is in the past' do
before do registry = create(:geo_job_artifact_registry, :with_artifact, :failed, retry_at: Date.yesterday)
stub_feature_flags(geo_job_artifact_registry_ssot_sync: false)
end
it 'performs Geo::FileDownloadWorker for unsynced job artifacts' do expect(Geo::FileDownloadWorker).to receive(:perform_async)
artifact = create(:ci_job_artifact) .with('job_artifact', registry.artifact_id).once.and_return(spy)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact.id) subject.perform
end
subject.perform
end
it 'performs Geo::FileDownloadWorker for failed-sync job artifacts' do context 'with files missing on the primary that are marked as synced' do
artifact = create(:ci_job_artifact) let!(:artifact_file_missing_on_primary) { create(:ci_job_artifact) }
let!(:artifact_registry) { create(:geo_job_artifact_registry, artifact_id: artifact_file_missing_on_primary.id, bytes: 1234, success: true, missing_on_primary: true) }
create(:geo_job_artifact_registry, artifact_id: artifact.id, bytes: 0, success: false) it 'retries the files if there is spare capacity' do
registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced)
expect(Geo::FileDownloadWorker).to receive(:perform_async) expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', registry.artifact_id)
.with('job_artifact', artifact.id).once.and_return(spy) expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform subject.perform
end end
it 'does not perform Geo::FileDownloadWorker for synced job artifacts' do it 'retries failed files with retry_at in the past' do
artifact = create(:ci_job_artifact) artifact_registry.update!(retry_at: Date.yesterday)
create(:geo_job_artifact_registry, artifact_id: artifact.id, bytes: 1234, success: true)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async) expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform subject.perform
end end
it 'does not perform Geo::FileDownloadWorker for synced job artifacts even with 0 bytes downloaded' do it 'does not retry files with later retry_at' do
artifact = create(:ci_job_artifact) artifact_registry.update!(retry_at: Date.tomorrow)
create(:geo_job_artifact_registry, artifact_id: artifact.id, bytes: 0, success: true) expect(Geo::FileDownloadWorker).not_to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
subject.perform subject.perform
end end
it 'does not retry failed artifacts when retry_at is tomorrow' do it 'does not retry those files if there is no spare capacity' do
failed_registry = create(:geo_job_artifact_registry, :with_artifact, bytes: 0, success: false, retry_at: Date.tomorrow) registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async).with('job_artifact', failed_registry.artifact_id) expect(subject).to receive(:db_retrieve_batch_size).and_return(1).twice
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', registry.artifact_id)
subject.perform subject.perform
end end
it 'retries failed artifacts when retry_at is in the past' do it 'does not retry those files if they are already scheduled' do
failed_registry = create(:geo_job_artifact_registry, :with_artifact, success: false, retry_at: Date.yesterday) registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', failed_registry.artifact_id) scheduled_jobs = [{ type: 'job_artifact', id: artifact_file_missing_on_primary.id, job_id: 'foo' }]
expect(subject).to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(1)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', registry.artifact_id)
subject.perform subject.perform
end end
context 'with files missing on the primary that are marked as synced' do
let!(:artifact_file_missing_on_primary) { create(:ci_job_artifact) }
let!(:artifact_registry) { create(:geo_job_artifact_registry, artifact_id: artifact_file_missing_on_primary.id, bytes: 1234, success: true, missing_on_primary: true) }
it 'retries the files if there is spare capacity' do
artifact = create(:ci_job_artifact)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact.id)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform
end
it 'retries failed files with retry_at in the past' do
artifact_registry.update!(retry_at: Date.yesterday)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform
end
it 'does not retry files with later retry_at' do
artifact_registry.update!(retry_at: Date.tomorrow)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform
end
it 'does not retry those files if there is no spare capacity' do
artifact = create(:ci_job_artifact)
expect(subject).to receive(:db_retrieve_batch_size).and_return(1).twice
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact.id)
subject.perform
end
it 'does not retry those files if they are already scheduled' do
artifact = create(:ci_job_artifact)
scheduled_jobs = [{ type: 'job_artifact', id: artifact_file_missing_on_primary.id, job_id: 'foo' }]
expect(subject).to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(1)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact.id)
subject.perform
end
end
end end
end end
...@@ -637,22 +521,6 @@ RSpec.describe Geo::FileDownloadDispatchWorker, :geo, :geo_fdw, :use_sql_query_c ...@@ -637,22 +521,6 @@ RSpec.describe Geo::FileDownloadDispatchWorker, :geo, :geo_fdw, :use_sql_query_c
allow(::GeoNode).to receive(:current_node).and_return(secondary) allow(::GeoNode).to receive(:current_node).and_return(secondary)
end end
context 'when geo_job_artifact_registry_ssot_sync feature is disabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: false)
end
it 'does not perform Geo::FileDownloadWorker for job artifact that does not belong to selected namespaces to replicate' do
create(:ci_job_artifact, project: unsynced_project)
job_artifact_in_synced_group = create(:ci_job_artifact, project: project_in_synced_group)
expect(Geo::FileDownloadWorker).to receive(:perform_async)
.with('job_artifact', job_artifact_in_synced_group.id).once.and_return(spy)
subject.perform
end
end
context 'with geo_file_registry_ssot_sync feature disabled' do context 'with geo_file_registry_ssot_sync feature disabled' do
before do before do
stub_feature_flags(geo_file_registry_ssot_sync: false) stub_feature_flags(geo_file_registry_ssot_sync: false)
......
...@@ -141,61 +141,6 @@ RSpec.describe Geo::MigratedLocalFilesCleanUpWorker, :geo, :geo_fdw, :use_sql_qu ...@@ -141,61 +141,6 @@ RSpec.describe Geo::MigratedLocalFilesCleanUpWorker, :geo, :geo_fdw, :use_sql_qu
end end
end end
context 'with job artifacts' do
let(:job_artifact_local) { create(:ci_job_artifact) }
let(:job_artifact_remote_1) { create(:ci_job_artifact, :remote_store, project: synced_project) }
before do
stub_artifacts_object_storage
create(:geo_job_artifact_registry, artifact_id: job_artifact_local.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_1.id)
end
it 'schedules worker for artifact stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('job_artifact', job_artifact_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_local.id)
subject.perform
end
context 'with selective sync by namespace' do
let(:job_artifact_remote_2) { create(:ci_job_artifact, :remote_store, project: project_broken_storage) }
let(:secondary) { create(:geo_node, :local_storage_only, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_2.id)
end
it 'schedules worker for artifact stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('job_artifact', job_artifact_remote_2.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_local.id)
subject.perform
end
end
context 'with selective sync by shard' do
let(:job_artifact_remote_2) { create(:ci_job_artifact, :remote_store, project: unsynced_project) }
let(:secondary) { create(:geo_node, :local_storage_only, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_2.id)
end
it 'schedules worker for artifact stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('job_artifact', job_artifact_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_remote_2.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_local.id)
subject.perform
end
end
end
context 'backoff time' do context 'backoff time' do
let(:cache_key) { "#{described_class.name.underscore}:skip" } let(:cache_key) { "#{described_class.name.underscore}:skip" }
......
...@@ -97,27 +97,6 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do ...@@ -97,27 +97,6 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do
expect(Geo::PackageFileRegistry.where(package_file_id: package_file.id).count).to eq(1) expect(Geo::PackageFileRegistry.where(package_file_id: package_file.id).count).to eq(1)
end end
context 'when geo_job_artifact_registry_ssot_sync is disabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: false)
end
it 'returns false' do
expect(subject.perform).to be_falsey
end
it 'does not execute RegistryConsistencyService for Job Artifacts' do
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::LfsObjectRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::ProjectRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::UploadRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::PackageFileRegistry, batch_size: 1000).and_call_original
expect(Geo::RegistryConsistencyService).not_to receive(:new).with(Geo::JobArtifactRegistry, batch_size: 1000)
subject.perform
end
end
context 'when geo_file_registry_ssot_sync is disabled' do context 'when geo_file_registry_ssot_sync is disabled' do
before do before do
stub_feature_flags(geo_file_registry_ssot_sync: false) stub_feature_flags(geo_file_registry_ssot_sync: false)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment