Commit daa1639e authored by Michael Kozono's avatar Michael Kozono

Merge branch...

Merge branch '217477-remove-feature-flags-to-make-registry-table-ssot-for-job-artifacts' into 'master'

Remove feature flag to make registry table SSOT for Job Artifacts

See merge request gitlab-org/gitlab!34590
parents 62f9013b be130b7b
......@@ -2,10 +2,8 @@
module Geo
class JobArtifactRegistryFinder < FileRegistryFinder
# Counts all existing registries independent
# of any change on filters / selective sync
def count_registry
Geo::JobArtifactRegistry.count
syncable.count
end
def count_syncable
......@@ -13,22 +11,19 @@ module Geo
end
def count_synced
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.synced).count
syncable.synced.count
end
def count_failed
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.failed).count
syncable.failed.count
end
def count_synced_missing_on_primary
registries_for_job_artifacts.merge(Geo::JobArtifactRegistry.synced.missing_on_primary).count
syncable.synced.missing_on_primary.count
end
def syncable
return job_artifacts.not_expired if selective_sync?
return Ci::JobArtifact.not_expired.with_files_stored_locally if local_storage_only?
Ci::JobArtifact.not_expired
Geo::JobArtifactRegistry
end
# Returns untracked IDs as well as tracked IDs that are unused.
......@@ -49,16 +44,8 @@ module Geo
#
# @return [Array] the first element is an Array of untracked IDs, and the second element is an Array of tracked IDs that are unused
def find_registry_differences(range)
# rubocop:disable CodeReuse/ActiveRecord
source_ids =
job_artifacts(fdw: false)
.id_in(range)
.pluck(::Ci::JobArtifact.arel_table[:id])
# rubocop:enable CodeReuse/ActiveRecord
tracked_ids =
Geo::JobArtifactRegistry
.pluck_model_ids_in_range(range)
source_ids = job_artifacts.id_in(range).pluck(::Ci::JobArtifact.arel_table[:id]) # rubocop:disable CodeReuse/ActiveRecord
tracked_ids = syncable.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
......@@ -84,49 +71,17 @@ module Geo
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop:disable CodeReuse/ActiveRecord
def find_never_synced_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry
syncable
.never
.model_id_not_in(except_ids)
.limit(batch_size)
end
alias_method :find_unsynced, :find_never_synced_registries
# rubocop:enable CodeReuse/ActiveRecord
# Deprecated in favor of the process using
# #find_registry_differences and #find_never_synced_registries
#
# Find limited amount of non replicated job artifacts.
#
# You can pass a list with `except_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_ids ids that will be ignored from the query
# rubocop: disable CodeReuse/ActiveRecord
def find_unsynced(batch_size:, except_ids: [])
job_artifacts
.not_expired
.missing_job_artifact_registry
.id_not_in(except_ids)
.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_migrated_local(batch_size:, except_ids: [])
all_job_artifacts
.inner_join_job_artifact_registry
.with_files_stored_remotely
.id_not_in(except_ids)
.limit(batch_size)
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_retryable_failed_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry
syncable
.failed
.retry_due
.model_id_not_in(except_ids)
......@@ -136,7 +91,7 @@ module Geo
# rubocop: disable CodeReuse/ActiveRecord
def find_retryable_synced_missing_on_primary_registries(batch_size:, except_ids: [])
Geo::JobArtifactRegistry
syncable
.synced
.missing_on_primary
.retry_due
......@@ -147,18 +102,12 @@ module Geo
private
def job_artifacts(fdw: true)
local_storage_only?(fdw: fdw) ? all_job_artifacts(fdw: fdw).with_files_stored_locally : all_job_artifacts(fdw: fdw)
end
def all_job_artifacts(fdw: true)
current_node(fdw: fdw).job_artifacts
def job_artifacts
local_storage_only?(fdw: false) ? all_job_artifacts.with_files_stored_locally : all_job_artifacts
end
def registries_for_job_artifacts
job_artifacts
.inner_join_job_artifact_registry
.not_expired
def all_job_artifacts
current_node(fdw: false).job_artifacts
end
end
end
......@@ -9,16 +9,7 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry
scope :never, -> { where(success: false, retry_count: nil) }
def self.failed
if registry_consistency_worker_enabled?
where(success: false).where.not(retry_count: nil)
else
# Would do `super` except it doesn't work with an included scope
where(success: false)
end
end
def self.registry_consistency_worker_enabled?
Feature.enabled?(:geo_job_artifact_registry_ssot_sync, default_enabled: true)
end
def self.finder_class
......
......@@ -12,13 +12,9 @@ module Geo
end
def find_unsynced_jobs(batch_size:)
if Geo::JobArtifactRegistry.registry_consistency_worker_enabled?
convert_registry_relation_to_job_args(
registry_finder.find_never_synced_registries(find_batch_params(batch_size))
)
else
super
end
end
end
end
......
......@@ -37,10 +37,7 @@ module Geo
end
def find_migrated_local_objects(batch_size:)
attachment_ids = find_migrated_local_attachments_ids(batch_size: batch_size)
job_artifact_ids = find_migrated_local_job_artifacts_ids(batch_size: batch_size)
take_batch(attachment_ids, job_artifact_ids)
find_migrated_local_attachments_ids(batch_size: batch_size)
end
# rubocop: disable CodeReuse/ActiveRecord
......@@ -53,16 +50,6 @@ module Geo
end
# rubocop: enable CodeReuse/ActiveRecord
# rubocop: disable CodeReuse/ActiveRecord
def find_migrated_local_job_artifacts_ids(batch_size:)
return [] unless job_artifacts_object_store_enabled?
job_artifacts_finder.find_migrated_local(batch_size: batch_size, except_ids: scheduled_file_ids(:job_artifact))
.pluck(Geo::Fdw::Ci::JobArtifact.arel_table[:id])
.map { |id| ['job_artifact', id] }
end
# rubocop: enable CodeReuse/ActiveRecord
def scheduled_file_ids(file_types)
file_types = Array(file_types)
file_types = file_types.map(&:to_s)
......@@ -74,13 +61,8 @@ module Geo
FileUploader.object_store_enabled?
end
def job_artifacts_object_store_enabled?
JobArtifactUploader.object_store_enabled?
end
def object_store_enabled?
attachments_object_store_enabled? ||
job_artifacts_object_store_enabled?
attachments_object_store_enabled?
end
def sync_object_storage_enabled?
......@@ -90,9 +72,5 @@ module Geo
def attachments_finder
@attachments_finder ||= AttachmentRegistryFinder.new(current_node_id: current_node.id)
end
def job_artifacts_finder
@job_artifacts_finder ||= JobArtifactRegistryFinder.new(current_node_id: current_node.id)
end
end
end
---
title: 'Geo: Make registry table SSOT for job artifacts'
merge_request: 34590
author:
type: performance
......@@ -2,287 +2,160 @@
require 'spec_helper'
RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do
RSpec.describe Geo::JobArtifactRegistryFinder, :geo do
include ::EE::GeoHelpers
# Using let() instead of set() because set() does not work properly
# when using the :delete DatabaseCleaner strategy, which is required for FDW
# tests because a foreign table can't see changes inside a transaction of a
# different connection.
let(:secondary) { create(:geo_node) }
let(:synced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project) }
let(:project_broken_storage) { create(:project, :broken_storage) }
subject { described_class.new(current_node_id: secondary.id) }
let_it_be(:secondary) { create(:geo_node) }
before do
stub_current_geo_node(secondary)
stub_artifacts_object_storage
end
let!(:job_artifact_synced_project) { create(:ci_job_artifact, project: synced_project) }
let!(:job_artifact_unsynced_project) { create(:ci_job_artifact, project: unsynced_project) }
let!(:job_artifact_broken_storage_1) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:job_artifact_broken_storage_2) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:job_artifact_expired_synced_project) { create(:ci_job_artifact, :expired, project: synced_project) }
let!(:job_artifact_expired_broken_storage) { create(:ci_job_artifact, :expired, project: project_broken_storage) }
let!(:job_artifact_remote_synced_project) { create(:ci_job_artifact, :remote_store, project: synced_project) }
let!(:job_artifact_remote_unsynced_project) { create(:ci_job_artifact, :remote_store, project: unsynced_project) }
let!(:job_artifact_remote_broken_storage) { create(:ci_job_artifact, :expired, :remote_store, project: project_broken_storage) }
context 'counts all the things' do
describe '#count_syncable' do
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 6
end
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group_1) { create(:group, parent: synced_group) }
let_it_be(:synced_project) { create(:project, group: synced_group) }
let_it_be(:synced_project_in_nested_group) { create(:project, group: nested_group_1) }
let_it_be(:unsynced_project) { create(:project) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
let!(:ci_job_artifact_1) { create(:ci_job_artifact, project: synced_project) }
let!(:ci_job_artifact_2) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let!(:ci_job_artifact_3) { create(:ci_job_artifact, project: synced_project_in_nested_group) }
let!(:ci_job_artifact_4) { create(:ci_job_artifact, project: unsynced_project) }
let!(:ci_job_artifact_5) { create(:ci_job_artifact, project: project_broken_storage) }
let!(:ci_job_artifact_remote_1) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_2) { create(:ci_job_artifact, :remote_store) }
let!(:ci_job_artifact_remote_3) { create(:ci_job_artifact, :remote_store) }
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 2
end
end
subject { described_class.new(current_node_id: secondary.id) }
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
describe '#count_syncable' do
it 'counts registries for job artifacts' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 2
expect(subject.count_syncable).to eq 8
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
describe '#count_registry' do
it 'counts registries for job artifacts' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'counts non-expired job artifacts' do
expect(subject.count_syncable).to eq 4
end
expect(subject.count_registry).to eq 8
end
end
describe '#count_synced' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id)
end
it 'counts registries that has been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'without selective sync' do
it 'counts job artifacts that have been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 3
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 1
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 1
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts job artifacts that has been synced ignoring expired job artifacts' do
expect(subject.count_synced).to eq 2
end
end
end
describe '#count_failed' do
before do
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_expired_synced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_expired_broken_storage.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_remote_synced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_remote_broken_storage.id)
end
it 'counts registries that sync has failed' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'without selective sync' do
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 3
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 2
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 1
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts job artifacts that sync has failed ignoring expired ones' do
expect(subject.count_failed).to eq 2
end
end
end
describe '#count_synced_missing_on_primary' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false, missing_on_primary: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id, missing_on_primary: false)
end
context 'without selective sync' do
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 2
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'counts registries that have been synced and are missing on the primary, excluding not synced ones' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 1
expect(subject.count_synced_missing_on_primary).to eq 3
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 1
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts job artifacts that have been synced and are missing on the primary, ignoring expired ones' do
expect(subject.count_synced_missing_on_primary).to eq 1
end
end
end
describe '#count_registry' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_2.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id, missing_on_primary: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
end
it 'counts file registries for job artifacts' do
expect(subject.count_registry).to eq 4
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'does not apply the selective sync restriction' do
expect(subject.count_registry).to eq 4
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'does not apply the selective sync restriction' do
expect(subject.count_registry).to eq 4
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'counts file registries for job artifacts ignoring remote artifacts' do
expect(subject.count_registry).to eq 4
end
end
end
end
context 'finds all the things' do
describe '#find_registry_differences' do
context 'untracked IDs' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_1.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
end
it 'includes Job Artifact IDs without an entry on the tracking database' do
it 'includes job artifact IDs without an entry on the tracking database' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[job_artifact_unsynced_project.id, job_artifact_remote_synced_project.id,
job_artifact_broken_storage_2.id, job_artifact_expired_synced_project.id,
job_artifact_remote_broken_storage.id])
[ci_job_artifact_2.id, ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id, ci_job_artifact_remote_3.id])
end
it 'excludes Job Artifacts outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(job_artifact_unsynced_project.id..job_artifact_broken_storage_2.id)
it 'excludes job artifacts outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(ci_job_artifact_3.id..ci_job_artifact_remote_2.id)
expect(untracked_ids).to match_array(
[job_artifact_unsynced_project.id, job_artifact_broken_storage_2.id])
[ci_job_artifact_5.id, ci_job_artifact_remote_1.id,
ci_job_artifact_remote_2.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes Job Artifacts that are not in selectively synced projects' do
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([job_artifact_expired_synced_project.id, job_artifact_remote_synced_project.id])
expect(untracked_ids).to match_array([ci_job_artifact_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes Job Artifacts that are not in selectively synced projects' do
it 'excludes job artifact IDs that are not in selectively synced projects' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array([job_artifact_broken_storage_2.id, job_artifact_remote_broken_storage.id])
expect(untracked_ids).to match_array([ci_job_artifact_5.id])
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes Job Artifacts in object storage' do
it 'excludes job artifacts in object storage' do
untracked_ids, _ = subject.find_registry_differences(Ci::JobArtifact.first.id..Ci::JobArtifact.last.id)
expect(untracked_ids).to match_array(
[job_artifact_unsynced_project.id, job_artifact_broken_storage_2.id,
job_artifact_expired_synced_project.id])
expect(untracked_ids).to match_array([ci_job_artifact_2.id, ci_job_artifact_5.id])
end
end
end
......@@ -292,13 +165,17 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do
let!(:orphaned) { create(:geo_job_artifact_registry, artifact_id: non_existing_record_id) }
it 'includes tracked IDs that do not exist in the model table' do
_, unused_tracked_ids = subject.find_registry_differences(non_existing_record_id..non_existing_record_id)
range = non_existing_record_id..non_existing_record_id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([non_existing_record_id])
end
it 'excludes IDs outside the ID range' do
_, unused_tracked_ids = subject.find_registry_differences(1..1000)
range = 1..1000
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
......@@ -307,68 +184,76 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked Job Artifact' do
it 'includes tracked Job Artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_unsynced_project.id)
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_4.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_4.id)
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_unsynced_project.id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_unsynced_project.id])
expect(unused_tracked_ids).to match_array([ci_job_artifact_4.id])
end
end
context 'without a tracked Job Artifact' do
it 'returns empty' do
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_unsynced_project.id)
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced projects' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked Job Artifact' do
it 'includes tracked Job Artifact IDs that exist but are not in a selectively synced project' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id)
context 'with a tracked job artifact' do
let!(:registry_entry) { create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id) }
let(:range) { ci_job_artifact_1.id..ci_job_artifact_5.id }
context 'excluded from selective sync' do
it 'includes tracked job artifact IDs that exist but are not in a selectively synced shard' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_broken_storage_1.id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_synced_project.id])
expect(unused_tracked_ids).to match_array([ci_job_artifact_1.id])
end
end
context 'without a tracked Job Artifact' do
it 'returns empty' do
_, unused_tracked_ids = subject.find_registry_differences(job_artifact_synced_project.id..job_artifact_broken_storage_1.id)
context 'included in selective sync' do
it 'excludes tracked job artifact IDs that are in selectively synced shards' do
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
context 'with a tracked Job Artifact' do
context 'with a tracked job artifact' do
context 'in object storage' do
it 'includes tracked Job Artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id)
range = job_artifact_remote_synced_project.id..job_artifact_remote_synced_project.id
it 'includes tracked job artifact IDs that are in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_1.id)
range = ci_job_artifact_remote_1.id..ci_job_artifact_remote_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([job_artifact_remote_synced_project.id])
expect(unused_tracked_ids).to match_array([ci_job_artifact_remote_1.id])
end
end
context 'not in object storage' do
it 'excludes tracked Job Artifact IDs that are not in object storage' do
create(:geo_lfs_object_registry, lfs_object_id: job_artifact_synced_project.id)
range = job_artifact_synced_project.id..job_artifact_synced_project.id
it 'excludes tracked job artifact IDs that are not in object storage' do
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_1.id)
range = ci_job_artifact_1.id..ci_job_artifact_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
......@@ -381,116 +266,130 @@ RSpec.describe Geo::JobArtifactRegistryFinder, :geo_fdw do
end
describe '#find_never_synced_registries' do
let!(:registry_job_artifact_1) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_synced_project.id) }
let!(:registry_job_artifact_2) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_unsynced_project.id) }
let!(:registry_job_artifact_3) { create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id) }
let!(:registry_job_artifact_4) { create(:geo_job_artifact_registry, :failed, artifact_id: job_artifact_broken_storage_2.id) }
let!(:registry_job_artifact_remote_1) { create(:geo_job_artifact_registry, :never_synced, artifact_id: job_artifact_remote_synced_project.id) }
it 'returns registries for job artifacts that have never been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
registry_ci_job_artifact_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns registries for Job Artifacts that have never been synced' do
registries = subject.find_never_synced_registries(batch_size: 10)
expect(registries).to match_ids(registry_job_artifact_1, registry_job_artifact_2, registry_job_artifact_remote_1)
end
expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3)
end
describe '#find_unsynced' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id, success: false)
create(:geo_job_artifact_registry, artifact_id: job_artifact_broken_storage_1.id, success: true)
create(:geo_job_artifact_registry, artifact_id: job_artifact_expired_broken_storage.id, success: true)
end
it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
context 'without selective sync' do
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do
job_artifacts = subject.find_unsynced(batch_size: 10, except_ids: [job_artifact_unsynced_project.id])
registries = subject.find_never_synced_registries(batch_size: 10, except_ids: [ci_job_artifact_3.id])
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project, job_artifact_remote_unsynced_project,
job_artifact_broken_storage_2)
expect(registries).to match_ids(registry_ci_job_artifact_remote_3)
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
describe '#find_unsynced' do
it 'returns registries for job artifacts that have never been synced' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
registry_ci_job_artifact_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do
job_artifacts = subject.find_unsynced(batch_size: 10)
registries = subject.find_unsynced(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project)
end
expect(registries).to match_ids(registry_ci_job_artifact_3, registry_ci_job_artifact_remote_3)
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
registry_ci_job_artifact_remote_3 = create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones' do
job_artifacts = subject.find_unsynced(batch_size: 10)
registries = subject.find_unsynced(batch_size: 10, except_ids: [ci_job_artifact_3.id])
expect(job_artifacts).to match_ids(job_artifact_broken_storage_2)
expect(registries).to match_ids(registry_ci_job_artifact_remote_3)
end
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
describe '#find_retryable_failed_registries' do
it 'returns registries for job artifacts that have failed to sync' do
registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
registry_ci_job_artifact_4 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts without an entry on the tracking database, ignoring expired ones and remotes' do
job_artifacts = subject.find_unsynced(batch_size: 10)
registries = subject.find_retryable_failed_registries(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_unsynced_project, job_artifact_broken_storage_2)
end
end
expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_4, registry_ci_job_artifact_remote_1)
end
describe '#find_migrated_local' do
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_synced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_unsynced_project.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_broken_storage.id)
end
it 'returns job artifacts excluding ones from the exception list' do
job_artifacts = subject.find_migrated_local(batch_size: 10, except_ids: [job_artifact_remote_synced_project.id])
expect(job_artifacts).to match_ids(job_artifact_remote_unsynced_project, job_artifact_remote_broken_storage)
end
it 'includes synced job artifacts that are expired, exclude stored locally' do
job_artifacts = subject.find_migrated_local(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project, job_artifact_remote_unsynced_project,
job_artifact_remote_broken_storage)
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes except_ids' do
registry_ci_job_artifact_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
registry_ci_job_artifact_remote_1 = create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts remotely and successfully synced locally' do
job_artifacts = subject.find_migrated_local(batch_size: 10)
registries = subject.find_retryable_failed_registries(batch_size: 10, except_ids: [ci_job_artifact_4.id])
expect(job_artifacts).to match_ids(job_artifact_remote_synced_project)
expect(registries).to match_ids(registry_ci_job_artifact_1, registry_ci_job_artifact_remote_1)
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
describe '#find_retryable_synced_missing_on_primary_registries' do
it 'returns registries for job artifacts that have been synced and are missing on the primary' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
registry_ci_job_artifact_5 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts remotely and successfully synced locally' do
job_artifacts = subject.find_migrated_local(batch_size: 10)
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10)
expect(job_artifacts).to match_ids(job_artifact_remote_broken_storage)
end
expect(registries).to match_ids(registry_ci_job_artifact_2, registry_ci_job_artifact_5)
end
context 'with object storage sync disabled' do
let(:secondary) { create(:geo_node, :local_storage_only) }
it 'excludes except_ids' do
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_1.id)
registry_ci_job_artifact_2 = create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_3.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_4.id)
create(:geo_job_artifact_registry, artifact_id: ci_job_artifact_5.id, missing_on_primary: true, retry_at: 1.day.ago)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_1.id)
create(:geo_job_artifact_registry, :failed, artifact_id: ci_job_artifact_remote_2.id, missing_on_primary: true)
create(:geo_job_artifact_registry, :never_synced, artifact_id: ci_job_artifact_remote_3.id)
it 'returns job artifacts excluding ones from the exception list' do
job_artifacts = subject.find_migrated_local(batch_size: 10, except_ids: [job_artifact_remote_synced_project.id])
registries = subject.find_retryable_synced_missing_on_primary_registries(batch_size: 10, except_ids: [ci_job_artifact_5.id])
expect(job_artifacts).to match_ids(job_artifact_remote_unsynced_project, job_artifact_remote_broken_storage)
end
end
expect(registries).to match_ids(registry_ci_job_artifact_2)
end
end
......
......@@ -321,29 +321,6 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do
end
describe '#job_artifacts_failed_count' do
context 'when geo_job_artifact_registry_ssot_sync is disabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: false)
end
it 'counts failed job artifacts' do
# These should be ignored
create(:geo_upload_registry, :failed)
create(:geo_upload_registry, :avatar, :failed)
create(:geo_upload_registry, :attachment, :failed)
create(:geo_job_artifact_registry, :with_artifact, success: true)
create(:geo_job_artifact_registry, :with_artifact, success: false)
expect(subject.job_artifacts_failed_count).to eq(1)
end
end
context 'when geo_job_artifact_registry_ssot_sync is enabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: true)
end
it 'counts failed job artifacts' do
# These should be ignored
create(:geo_upload_registry, :failed)
......@@ -356,7 +333,6 @@ RSpec.describe GeoNodeStatus, :geo, :geo_fdw do
expect(subject.job_artifacts_failed_count).to eq(1)
end
end
end
describe '#job_artifacts_synced_in_percentage' do
context 'when artifacts are available' do
......
......@@ -361,11 +361,6 @@ RSpec.describe Geo::FileDownloadDispatchWorker, :geo, :geo_fdw, :use_sql_query_c
end
context 'with job artifacts' do
context 'with geo_job_artifact_registry_ssot_sync feature enabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: true)
end
it 'performs Geo::FileDownloadWorker for unsynced job artifacts' do
registry = create(:geo_job_artifact_registry, :with_artifact, :never_synced)
......@@ -470,117 +465,6 @@ RSpec.describe Geo::FileDownloadDispatchWorker, :geo, :geo_fdw, :use_sql_query_c
end
end
context 'with geo_job_artifact_registry_ssot_sync feature disabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: false)
end
it 'performs Geo::FileDownloadWorker for unsynced job artifacts' do
artifact = create(:ci_job_artifact)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact.id)
subject.perform
end
it 'performs Geo::FileDownloadWorker for failed-sync job artifacts' do
artifact = create(:ci_job_artifact)
create(:geo_job_artifact_registry, artifact_id: artifact.id, bytes: 0, success: false)
expect(Geo::FileDownloadWorker).to receive(:perform_async)
.with('job_artifact', artifact.id).once.and_return(spy)
subject.perform
end
it 'does not perform Geo::FileDownloadWorker for synced job artifacts' do
artifact = create(:ci_job_artifact)
create(:geo_job_artifact_registry, artifact_id: artifact.id, bytes: 1234, success: true)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
subject.perform
end
it 'does not perform Geo::FileDownloadWorker for synced job artifacts even with 0 bytes downloaded' do
artifact = create(:ci_job_artifact)
create(:geo_job_artifact_registry, artifact_id: artifact.id, bytes: 0, success: true)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async)
subject.perform
end
it 'does not retry failed artifacts when retry_at is tomorrow' do
failed_registry = create(:geo_job_artifact_registry, :with_artifact, bytes: 0, success: false, retry_at: Date.tomorrow)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async).with('job_artifact', failed_registry.artifact_id)
subject.perform
end
it 'retries failed artifacts when retry_at is in the past' do
failed_registry = create(:geo_job_artifact_registry, :with_artifact, success: false, retry_at: Date.yesterday)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', failed_registry.artifact_id)
subject.perform
end
context 'with files missing on the primary that are marked as synced' do
let!(:artifact_file_missing_on_primary) { create(:ci_job_artifact) }
let!(:artifact_registry) { create(:geo_job_artifact_registry, artifact_id: artifact_file_missing_on_primary.id, bytes: 1234, success: true, missing_on_primary: true) }
it 'retries the files if there is spare capacity' do
artifact = create(:ci_job_artifact)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact.id)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform
end
it 'retries failed files with retry_at in the past' do
artifact_registry.update!(retry_at: Date.yesterday)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform
end
it 'does not retry files with later retry_at' do
artifact_registry.update!(retry_at: Date.tomorrow)
expect(Geo::FileDownloadWorker).not_to receive(:perform_async).with('job_artifact', artifact_file_missing_on_primary.id)
subject.perform
end
it 'does not retry those files if there is no spare capacity' do
artifact = create(:ci_job_artifact)
expect(subject).to receive(:db_retrieve_batch_size).and_return(1).twice
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact.id)
subject.perform
end
it 'does not retry those files if they are already scheduled' do
artifact = create(:ci_job_artifact)
scheduled_jobs = [{ type: 'job_artifact', id: artifact_file_missing_on_primary.id, job_id: 'foo' }]
expect(subject).to receive(:scheduled_jobs).and_return(scheduled_jobs).at_least(1)
expect(Geo::FileDownloadWorker).to receive(:perform_async).with('job_artifact', artifact.id)
subject.perform
end
end
end
end
context 'backoff time' do
let(:cache_key) { "#{described_class.name.underscore}:skip" }
......@@ -637,22 +521,6 @@ RSpec.describe Geo::FileDownloadDispatchWorker, :geo, :geo_fdw, :use_sql_query_c
allow(::GeoNode).to receive(:current_node).and_return(secondary)
end
context 'when geo_job_artifact_registry_ssot_sync feature is disabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: false)
end
it 'does not perform Geo::FileDownloadWorker for job artifact that does not belong to selected namespaces to replicate' do
create(:ci_job_artifact, project: unsynced_project)
job_artifact_in_synced_group = create(:ci_job_artifact, project: project_in_synced_group)
expect(Geo::FileDownloadWorker).to receive(:perform_async)
.with('job_artifact', job_artifact_in_synced_group.id).once.and_return(spy)
subject.perform
end
end
context 'with geo_file_registry_ssot_sync feature disabled' do
before do
stub_feature_flags(geo_file_registry_ssot_sync: false)
......
......@@ -141,61 +141,6 @@ RSpec.describe Geo::MigratedLocalFilesCleanUpWorker, :geo, :geo_fdw, :use_sql_qu
end
end
context 'with job artifacts' do
let(:job_artifact_local) { create(:ci_job_artifact) }
let(:job_artifact_remote_1) { create(:ci_job_artifact, :remote_store, project: synced_project) }
before do
stub_artifacts_object_storage
create(:geo_job_artifact_registry, artifact_id: job_artifact_local.id)
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_1.id)
end
it 'schedules worker for artifact stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('job_artifact', job_artifact_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_local.id)
subject.perform
end
context 'with selective sync by namespace' do
let(:job_artifact_remote_2) { create(:ci_job_artifact, :remote_store, project: project_broken_storage) }
let(:secondary) { create(:geo_node, :local_storage_only, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_2.id)
end
it 'schedules worker for artifact stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('job_artifact', job_artifact_remote_2.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_local.id)
subject.perform
end
end
context 'with selective sync by shard' do
let(:job_artifact_remote_2) { create(:ci_job_artifact, :remote_store, project: unsynced_project) }
let(:secondary) { create(:geo_node, :local_storage_only, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
before do
create(:geo_job_artifact_registry, artifact_id: job_artifact_remote_2.id)
end
it 'schedules worker for artifact stored remotely and synced locally' do
expect(Geo::FileRegistryRemovalWorker).to receive(:perform_async).with('job_artifact', job_artifact_remote_1.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_remote_2.id)
expect(Geo::FileRegistryRemovalWorker).not_to receive(:perform_async).with(anything, job_artifact_local.id)
subject.perform
end
end
end
context 'backoff time' do
let(:cache_key) { "#{described_class.name.underscore}:skip" }
......
......@@ -97,27 +97,6 @@ RSpec.describe Geo::Secondary::RegistryConsistencyWorker, :geo, :geo_fdw do
expect(Geo::PackageFileRegistry.where(package_file_id: package_file.id).count).to eq(1)
end
context 'when geo_job_artifact_registry_ssot_sync is disabled' do
before do
stub_feature_flags(geo_job_artifact_registry_ssot_sync: false)
end
it 'returns false' do
expect(subject.perform).to be_falsey
end
it 'does not execute RegistryConsistencyService for Job Artifacts' do
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::LfsObjectRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::ProjectRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::UploadRegistry, batch_size: 1000).and_call_original
allow(Geo::RegistryConsistencyService).to receive(:new).with(Geo::PackageFileRegistry, batch_size: 1000).and_call_original
expect(Geo::RegistryConsistencyService).not_to receive(:new).with(Geo::JobArtifactRegistry, batch_size: 1000)
subject.perform
end
end
context 'when geo_file_registry_ssot_sync is disabled' do
before do
stub_feature_flags(geo_file_registry_ssot_sync: false)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment