Commit b4d2e1f3 authored by Toon Claes's avatar Toon Claes Committed by Nick Thomas

Geo: Ignore remote stored objects when calculating counts

parent ddf6889a
......@@ -12,6 +12,7 @@ class Upload < ActiveRecord::Base
validates :uploader, presence: true
scope :with_files_stored_locally, -> { where(store: [nil, ObjectStorage::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(store: ObjectStorage::Store::REMOTE) }
before_save :calculate_checksum!, if: :foreground_checksummable?
after_commit :schedule_checksum, if: :checksummable?
......
module Geo
class AttachmentRegistryFinder < FileRegistryFinder
def attachments
relation =
if selective_sync?
Upload.where(group_uploads.or(project_uploads).or(other_uploads))
else
Upload.all
end
if selective_sync?
Upload.where(group_uploads.or(project_uploads).or(other_uploads))
else
Upload.all
end
end
relation.with_files_stored_locally
def local_attachments
attachments.with_files_stored_locally
end
def count_attachments
attachments.count
def count_local_attachments
local_attachments.count
end
def count_synced_attachments
......@@ -49,20 +50,20 @@ module Geo
# Find limited amount of non replicated attachments.
#
# You can pass a list with `except_registry_ids:` so you can exclude items you
# already scheduled but haven't finished and persisted to the database yet
# You can pass a list with `except_file_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_registry_ids ids that will be ignored from the query
def find_unsynced_attachments(batch_size:, except_registry_ids: [])
# @param [Array<Integer>] except_file_ids ids that will be ignored from the query
def find_unsynced_attachments(batch_size:, except_file_ids: [])
relation =
if use_legacy_queries?
legacy_find_unsynced_attachments(except_registry_ids: except_registry_ids)
legacy_find_unsynced_attachments(except_file_ids: except_file_ids)
else
fdw_find_unsynced_attachments(except_registry_ids: except_registry_ids)
fdw_find_unsynced_attachments(except_file_ids: except_file_ids)
end
relation.limit(batch_size)
......@@ -106,31 +107,40 @@ module Geo
#
def fdw_find_synced_attachments
fdw_find_attachments.merge(Geo::FileRegistry.synced)
fdw_find_local_attachments.merge(Geo::FileRegistry.synced)
end
def fdw_find_failed_attachments
fdw_find_attachments.merge(Geo::FileRegistry.failed)
fdw_find_local_attachments.merge(Geo::FileRegistry.failed)
end
def fdw_find_attachments
fdw_table = Geo::Fdw::Upload.table_name
Geo::Fdw::Upload.joins("INNER JOIN file_registry ON file_registry.file_id = #{fdw_table}.id")
def fdw_find_local_attachments
fdw_attachments.joins("INNER JOIN file_registry ON file_registry.file_id = #{fdw_attachments_table}.id")
.with_files_stored_locally
.merge(Geo::FileRegistry.attachments)
end
def fdw_find_unsynced_attachments(except_registry_ids:)
fdw_table = Geo::Fdw::Upload.table_name
def fdw_find_unsynced_attachments(except_file_ids:)
upload_types = Geo::FileService::DEFAULT_OBJECT_TYPES.map { |val| "'#{val}'" }.join(',')
Geo::Fdw::Upload.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_table}.id
AND file_registry.file_type IN (#{upload_types})")
fdw_attachments.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_attachments_table}.id
AND file_registry.file_type IN (#{upload_types})")
.with_files_stored_locally
.where(file_registry: { id: nil })
.where.not(id: except_registry_ids)
.where.not(id: except_file_ids)
end
def fdw_attachments
if selective_sync?
Geo::Fdw::Upload.where(group_uploads.or(project_uploads).or(other_uploads))
else
Geo::Fdw::Upload.all
end
end
def fdw_attachments_table
Geo::Fdw::Upload.table_name
end
#
......@@ -139,7 +149,7 @@ module Geo
def legacy_find_synced_attachments
legacy_inner_join_registry_ids(
attachments,
local_attachments,
Geo::FileRegistry.attachments.synced.pluck(:file_id),
Upload
)
......@@ -147,18 +157,18 @@ module Geo
def legacy_find_failed_attachments
legacy_inner_join_registry_ids(
attachments,
local_attachments,
Geo::FileRegistry.attachments.failed.pluck(:file_id),
Upload
)
end
def legacy_find_unsynced_attachments(except_registry_ids:)
registry_ids = legacy_pluck_registry_ids(file_types: Geo::FileService::DEFAULT_OBJECT_TYPES, except_registry_ids: except_registry_ids)
def legacy_find_unsynced_attachments(except_file_ids:)
registry_file_ids = legacy_pluck_registry_file_ids(file_types: Geo::FileService::DEFAULT_OBJECT_TYPES) | except_file_ids
legacy_left_outer_join_registry_ids(
attachments,
registry_ids,
local_attachments,
registry_file_ids,
Upload
)
end
......
......@@ -6,9 +6,8 @@ module Geo
protected
def legacy_pluck_registry_ids(file_types:, except_registry_ids:)
ids = Geo::FileRegistry.where(file_type: file_types).pluck(:file_id)
(ids + except_registry_ids).uniq
def legacy_pluck_registry_file_ids(file_types:)
Geo::FileRegistry.where(file_type: file_types).pluck(:file_id)
end
end
end
module Geo
class JobArtifactRegistryFinder < FileRegistryFinder
def count_job_artifacts
job_artifacts.count
local_job_artifacts.count
end
def count_synced_job_artifacts
relation =
if selective_sync?
legacy_find_synced_job_artifacts
else
find_synced_job_artifacts_registries
end
relation.count
if aggregate_pushdown_supported?
find_synced_job_artifacts.count
else
legacy_find_synced_job_artifacts.count
end
end
def count_failed_job_artifacts
relation =
if selective_sync?
legacy_find_failed_job_artifacts
else
find_failed_job_artifacts_registries
end
relation.count
if aggregate_pushdown_supported?
find_failed_job_artifacts.count
else
legacy_find_failed_job_artifacts.count
end
end
# Find limited amount of non replicated lfs objects.
#
# You can pass a list with `except_registry_ids:` so you can exclude items you
# already scheduled but haven't finished and persisted to the database yet
# You can pass a list with `except_file_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_registry_ids ids that will be ignored from the query
def find_unsynced_job_artifacts(batch_size:, except_registry_ids: [])
# @param [Array<Integer>] except_file_ids ids that will be ignored from the query
def find_unsynced_job_artifacts(batch_size:, except_file_ids: [])
relation =
if use_legacy_queries?
legacy_find_unsynced_job_artifacts(except_registry_ids: except_registry_ids)
legacy_find_unsynced_job_artifacts(except_file_ids: except_file_ids)
else
fdw_find_unsynced_job_artifacts(except_registry_ids: except_registry_ids)
fdw_find_unsynced_job_artifacts(except_file_ids: except_file_ids)
end
relation.limit(batch_size)
end
def job_artifacts
relation =
if selective_sync?
Ci::JobArtifact.joins(:project).where(projects: { id: current_node.projects })
else
Ci::JobArtifact.all
end
if selective_sync?
Ci::JobArtifact.joins(:project).where(projects: { id: current_node.projects })
else
Ci::JobArtifact.all
end
end
relation.with_files_stored_locally
def local_job_artifacts
job_artifacts.with_files_stored_locally
end
private
def find_synced_job_artifacts_registries
Geo::FileRegistry.job_artifacts.synced
def find_synced_job_artifacts
if use_legacy_queries?
legacy_find_synced_job_artifacts
else
fdw_find_job_artifacts.merge(Geo::FileRegistry.synced)
end
end
def find_failed_job_artifacts_registries
Geo::FileRegistry.job_artifacts.failed
def find_failed_job_artifacts
if use_legacy_queries?
legacy_find_failed_job_artifacts
else
fdw_find_job_artifacts.merge(Geo::FileRegistry.failed)
end
end
#
# FDW accessors
#
def fdw_find_unsynced_job_artifacts(except_registry_ids:)
fdw_table = Geo::Fdw::Ci::JobArtifact.table_name
def fdw_find_job_artifacts
fdw_job_artifacts.joins("INNER JOIN file_registry ON file_registry.file_id = #{fdw_jobs_artifacts_table}.id")
.with_files_stored_locally
.merge(Geo::FileRegistry.job_artifacts)
end
Geo::Fdw::Ci::JobArtifact.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_table}.id
AND file_registry.file_type = 'job_artifact'")
def fdw_find_unsynced_job_artifacts(except_file_ids:)
fdw_job_artifacts.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_job_artifacts_table}.id
AND file_registry.file_type = 'job_artifact'")
.with_files_stored_locally
.where(file_registry: { id: nil })
.where.not(id: except_registry_ids)
.where.not(id: except_file_ids)
end
def fdw_job_artifacts
if selective_sync?
Geo::Fdw::Ci::JobArtifact.joins(:project).where(projects: { id: current_node.projects })
else
Geo::Fdw::Ci::JobArtifact.all
end
end
def fdw_job_artifacts_table
Geo::Fdw::Ci::JobArtifact.table_name
end
#
......@@ -89,26 +108,26 @@ module Geo
def legacy_find_synced_job_artifacts
legacy_inner_join_registry_ids(
job_artifacts,
find_synced_job_artifacts_registries.pluck(:file_id),
local_job_artifacts,
Geo::FileRegistry.job_artifacts.synced.pluck(:file_id),
Ci::JobArtifact
)
end
def legacy_find_failed_job_artifacts
legacy_inner_join_registry_ids(
job_artifacts,
find_failed_job_artifacts_registries.pluck(:file_id),
local_job_artifacts,
Geo::FileRegistry.job_artifacts.failed.pluck(:file_id),
Ci::JobArtifact
)
end
def legacy_find_unsynced_job_artifacts(except_registry_ids:)
registry_ids = legacy_pluck_registry_ids(file_types: :job_artifact, except_registry_ids: except_registry_ids)
def legacy_find_unsynced_job_artifacts(except_file_ids:)
registry_file_ids = legacy_pluck_registry_file_ids(file_types: :job_artifact) | except_file_ids
legacy_left_outer_join_registry_ids(
job_artifacts,
registry_ids,
local_job_artifacts,
registry_file_ids,
Ci::JobArtifact
)
end
......
module Geo
class LfsObjectRegistryFinder < FileRegistryFinder
def count_lfs_objects
lfs_objects.count
local_lfs_objects.count
end
def count_synced_lfs_objects
relation =
if selective_sync?
legacy_find_synced_lfs_objects
else
find_synced_lfs_objects_registries
end
relation.count
if aggregate_pushdown_supported?
find_synced_lfs_objects.count
else
legacy_find_synced_lfs_objects.count
end
end
def count_failed_lfs_objects
relation =
if selective_sync?
legacy_find_failed_lfs_objects
else
find_failed_lfs_objects_registries
end
relation.count
if aggregate_pushdown_supported?
find_failed_lfs_objects.count
else
legacy_find_failed_lfs_objects.count
end
end
# Find limited amount of non replicated lfs objects.
#
# You can pass a list with `except_registry_ids:` so you can exclude items you
# already scheduled but haven't finished and persisted to the database yet
# You can pass a list with `except_file_ids:` so you can exclude items you
# already scheduled but haven't finished and aren't persisted to the database yet
#
# TODO: Alternative here is to use some sort of window function with a cursor instead
# of simply limiting the query and passing a list of items we don't want
#
# @param [Integer] batch_size used to limit the results returned
# @param [Array<Integer>] except_registry_ids ids that will be ignored from the query
def find_unsynced_lfs_objects(batch_size:, except_registry_ids: [])
# @param [Array<Integer>] except_file_ids ids that will be ignored from the query
def find_unsynced_lfs_objects(batch_size:, except_file_ids: [])
relation =
if use_legacy_queries?
legacy_find_unsynced_lfs_objects(except_registry_ids: except_registry_ids)
legacy_find_unsynced_lfs_objects(except_file_ids: except_file_ids)
else
fdw_find_unsynced_lfs_objects(except_registry_ids: except_registry_ids)
fdw_find_unsynced_lfs_objects(except_file_ids: except_file_ids)
end
relation.limit(batch_size)
end
def lfs_objects
relation =
if selective_sync?
LfsObject.joins(:projects).where(projects: { id: current_node.projects })
else
LfsObject.all
end
if selective_sync?
LfsObject.joins(:projects).where(projects: { id: current_node.projects })
else
LfsObject.all
end
end
relation.with_files_stored_locally
def local_lfs_objects
lfs_objects.with_files_stored_locally
end
private
def find_synced_lfs_objects_registries
Geo::FileRegistry.lfs_objects.synced
def find_synced_lfs_objects
if use_legacy_queries?
legacy_find_synced_lfs_objects
else
fdw_find_lfs_objects.merge(Geo::FileRegistry.synced)
end
end
def find_failed_lfs_objects_registries
Geo::FileRegistry.lfs_objects.failed
def find_failed_lfs_objects
if use_legacy_queries?
legacy_find_failed_lfs_objects
else
fdw_find_lfs_objects.merge(Geo::FileRegistry.failed)
end
end
#
# FDW accessors
#
def fdw_find_unsynced_lfs_objects(except_registry_ids:)
fdw_table = Geo::Fdw::LfsObject.table_name
def fdw_find_lfs_objects
fdw_lfs_objects.joins("INNER JOIN file_registry ON file_registry.file_id = #{fdw_lfs_objects_table}.id")
.with_files_stored_locally
.merge(Geo::FileRegistry.lfs_objects)
end
# Filter out objects in object storage (this is done in GeoNode#lfs_objects)
Geo::Fdw::LfsObject.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_table}.id
AND file_registry.file_type = 'lfs'")
def fdw_find_unsynced_lfs_objects(except_file_ids:)
fdw_lfs_objects.joins("LEFT OUTER JOIN file_registry
ON file_registry.file_id = #{fdw_lfs_objects_table}.id
AND file_registry.file_type = 'lfs'")
.with_files_stored_locally
.where(file_registry: { id: nil })
.where.not(id: except_registry_ids)
.where.not(id: except_file_ids)
end
def fdw_lfs_objects
if selective_sync?
Geo::Fdw::LfsObject.joins(:project).where(projects: { id: current_node.projects })
else
Geo::Fdw::LfsObject.all
end
end
def fdw_lfs_objects_table
Geo::Fdw::LfsObject.table_name
end
#
......@@ -90,26 +108,26 @@ module Geo
def legacy_find_synced_lfs_objects
legacy_inner_join_registry_ids(
lfs_objects,
find_synced_lfs_objects_registries.pluck(:file_id),
local_lfs_objects,
Geo::FileRegistry.lfs_objects.synced.pluck(:file_id),
LfsObject
)
end
def legacy_find_failed_lfs_objects
legacy_inner_join_registry_ids(
lfs_objects,
find_failed_lfs_objects_registries.pluck(:file_id),
local_lfs_objects,
Geo::FileRegistry.lfs_objects.failed.pluck(:file_id),
LfsObject
)
end
def legacy_find_unsynced_lfs_objects(except_registry_ids:)
registry_ids = legacy_pluck_registry_ids(file_types: :lfs, except_registry_ids: except_registry_ids)
def legacy_find_unsynced_lfs_objects(except_file_ids:)
registry_file_ids = legacy_pluck_registry_file_ids(file_types: :lfs) | except_file_ids
legacy_left_outer_join_registry_ids(
lfs_objects,
registry_ids,
local_lfs_objects,
registry_file_ids,
LfsObject
)
end
......
......@@ -12,6 +12,7 @@ module EE
after_destroy :log_geo_event
scope :with_files_stored_locally, -> { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(file_store: ObjectStorage::Store::REMOTE) }
end
def local_store?
......
......@@ -4,6 +4,7 @@ module Geo
self.table_name = Gitlab::Geo::Fdw.table('lfs_objects')
scope :with_files_stored_locally, -> { where(file_store: [nil, LfsObjectUploader::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(file_store: LfsObjectUploader::Store::REMOTE) }
end
end
end
......@@ -4,6 +4,7 @@ module Geo
self.table_name = Gitlab::Geo::Fdw.table('uploads')
scope :with_files_stored_locally, -> { where(store: [nil, ObjectStorage::Store::LOCAL]) }
scope :with_files_stored_remotely, -> { where(store: ObjectStorage::Store::REMOTE) }
end
end
end
......@@ -5,4 +5,5 @@ class Geo::FileRegistry < Geo::BaseRegistry
scope :lfs_objects, -> { where(file_type: :lfs) }
scope :job_artifacts, -> { where(file_type: :job_artifact) }
scope :attachments, -> { where(file_type: Geo::FileService::DEFAULT_OBJECT_TYPES) }
scope :stored_locally, -> { where(store: [nil, ObjectStorage::Store::LOCAL]) }
end
......@@ -105,7 +105,7 @@ class GeoNodeStatus < ActiveRecord::Base
self.wikis_count = projects_finder.count_wikis
self.lfs_objects_count = lfs_objects_finder.count_lfs_objects
self.job_artifacts_count = job_artifacts_finder.count_job_artifacts
self.attachments_count = attachments_finder.count_attachments
self.attachments_count = attachments_finder.count_local_attachments
self.last_successful_status_check_at = Time.now
self.storage_shards = StorageShard.all
......
......@@ -53,19 +53,19 @@ module Geo
end
def find_unsynced_lfs_objects_ids(batch_size:)
lfs_objects_finder.find_unsynced_lfs_objects(batch_size: batch_size, except_registry_ids: scheduled_file_ids(:lfs))
lfs_objects_finder.find_unsynced_lfs_objects(batch_size: batch_size, except_file_ids: scheduled_file_ids(:lfs))
.pluck(:id)
.map { |id| [id, :lfs] }
end
def find_unsynced_attachments_ids(batch_size:)
attachments_finder.find_unsynced_attachments(batch_size: batch_size, except_registry_ids: scheduled_file_ids(Geo::FileService::DEFAULT_OBJECT_TYPES))
attachments_finder.find_unsynced_attachments(batch_size: batch_size, except_file_ids: scheduled_file_ids(Geo::FileService::DEFAULT_OBJECT_TYPES))
.pluck(:id, :uploader)
.map { |id, uploader| [id, uploader.sub(/Uploader\z/, '').underscore] }
end
def find_unsynced_job_artifacts_ids(batch_size:)
job_artifacts_finder.find_unsynced_job_artifacts(batch_size: batch_size, except_registry_ids: scheduled_file_ids(:job_artifact))
job_artifacts_finder.find_unsynced_job_artifacts(batch_size: batch_size, except_file_ids: scheduled_file_ids(:job_artifact))
.pluck(:id)
.map { |id| [id, :job_artifact] }
end
......
---
title: 'Geo: Ignore remote stored objects when calculating counts'
merge_request: 4864
author:
type: fixed
......@@ -28,16 +28,10 @@ describe Geo::AttachmentRegistryFinder, :geo do
stub_current_geo_node(secondary)
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
context 'FDW', :delete do
before do
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo::Fdw.enabled?
end
shared_examples 'finds all the things' do
describe '#find_synced_attachments' do
it 'delegates to #fdw_find_synced_attachments' do
expect(subject).to receive(:fdw_find_synced_attachments).and_call_original
it 'delegates to the correct method' do
expect(subject).to receive("#{method_prefix}_find_synced_attachments".to_sym).and_call_original
subject.find_synced_attachments
end
......@@ -52,107 +46,17 @@ describe Geo::AttachmentRegistryFinder, :geo do
synced_attachments = subject.find_synced_attachments
expect(synced_attachments.pluck(:id)).to match_array([upload_1.id, upload_2.id, upload_6.id, upload_7.id])
end
context 'with selective sync' do
it 'falls back to legacy queries' do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
expect(subject).to receive(:legacy_find_synced_attachments)
subject.find_synced_attachments
end
end
end
describe '#find_failed_attachments' do
it 'delegates to #fdw_find_failed_attachments' do
expect(subject).to receive(:fdw_find_failed_attachments).and_call_original
subject.find_failed_attachments
expect(synced_attachments).to match_ids(upload_1, upload_2, upload_6, upload_7)
end
it 'returns failed avatars, attachment, personal snippets and files' do
it 'only finds local attachments' do
create(:geo_file_registry, :avatar, file_id: upload_1.id)
create(:geo_file_registry, :avatar, file_id: upload_2.id)
create(:geo_file_registry, :avatar, file_id: upload_3.id, success: false)
create(:geo_file_registry, :avatar, file_id: upload_6.id, success: false)
create(:geo_file_registry, :avatar, file_id: upload_7.id, success: false)
create(:geo_file_registry, :lfs, file_id: lfs_object.id, success: false)
failed_attachments = subject.find_failed_attachments
expect(failed_attachments.pluck(:id)).to match_array([upload_3.id, upload_6.id, upload_7.id])
end
context 'with selective sync' do
it 'falls back to legacy queries' do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
expect(subject).to receive(:legacy_find_failed_attachments)
subject.find_failed_attachments
end
end
end
describe '#find_unsynced_attachments' do
it 'delegates to #fdw_find_unsynced_attachments' do
expect(subject).to receive(:fdw_find_unsynced_attachments).and_call_original
subject.find_unsynced_attachments(batch_size: 10)
end
it 'returns uploads without an entry on the tracking database' do
create(:geo_file_registry, :avatar, file_id: upload_1.id, success: true)
uploads = subject.find_unsynced_attachments(batch_size: 10)
expect(uploads.map(&:id)).to match_array([upload_2.id, upload_3.id, upload_4.id])
end
it 'excludes uploads without an entry on the tracking database' do
create(:geo_file_registry, :avatar, file_id: upload_1.id, success: true)
uploads = subject.find_unsynced_attachments(batch_size: 10, except_registry_ids: [upload_2.id])
expect(uploads.map(&:id)).to match_array([upload_3.id, upload_4.id])
end
it 'excludes remote uploads without an entry on the tracking database' do
create(:geo_file_registry, :avatar, file_id: upload_1.id, success: true)
uploads = subject.find_unsynced_attachments(batch_size: 10)
expect(uploads).not_to include(upload_8, upload_9)
end
end
end
context 'Legacy' do
before do
allow(Gitlab::Geo::Fdw).to receive(:enabled?).and_return(false)
end
describe '#find_synced_attachments' do
it 'delegates to #legacy_find_synced_attachments' do
expect(subject).to receive(:legacy_find_synced_attachments).and_call_original
subject.find_synced_attachments
end
it 'returns synced avatars, attachment, personal snippets and files' do
create(:geo_file_registry, :avatar, file_id: upload_1.id)
create(:geo_file_registry, :avatar, file_id: upload_2.id)
create(:geo_file_registry, :avatar, file_id: upload_3.id, success: false)
create(:geo_file_registry, :avatar, file_id: upload_6.id)
create(:geo_file_registry, :avatar, file_id: upload_7.id)
create(:geo_file_registry, :lfs, file_id: lfs_object.id)
upload_1.update!(store: ObjectStorage::Store::REMOTE)
synced_attachments = subject.find_synced_attachments
expect(synced_attachments).to match_array([upload_1, upload_2, upload_6, upload_7])
expect(synced_attachments).to match_ids(upload_2)
end
context 'with selective sync by namespace' do
......@@ -170,7 +74,7 @@ describe Geo::AttachmentRegistryFinder, :geo do
synced_attachments = subject.find_synced_attachments
expect(synced_attachments).to match_array([upload_1, upload_3, upload_6, upload_7])
expect(synced_attachments).to match_ids(upload_1, upload_3, upload_6, upload_7)
end
end
......@@ -189,14 +93,14 @@ describe Geo::AttachmentRegistryFinder, :geo do
synced_attachments = subject.find_synced_attachments
expect(synced_attachments).to match_array([upload_1, upload_3, upload_6])
expect(synced_attachments).to match_ids(upload_1, upload_3, upload_6)
end
end
end
describe '#find_failed_attachments' do
it 'delegates to #legacy_find_failed_attachments' do
expect(subject).to receive(:legacy_find_failed_attachments).and_call_original
it 'delegates to the correct method' do
expect(subject).to receive("#{method_prefix}_find_failed_attachments".to_sym).and_call_original
subject.find_failed_attachments
end
......@@ -211,7 +115,7 @@ describe Geo::AttachmentRegistryFinder, :geo do
failed_attachments = subject.find_failed_attachments
expect(failed_attachments).to match_array([upload_3, upload_6, upload_7])
expect(failed_attachments).to match_ids(upload_3, upload_6, upload_7)
end
context 'with selective sync by namespace' do
......@@ -229,7 +133,7 @@ describe Geo::AttachmentRegistryFinder, :geo do
failed_attachments = subject.find_failed_attachments
expect(failed_attachments).to match_array([upload_1, upload_3, upload_6, upload_7])
expect(failed_attachments).to match_ids(upload_1, upload_3, upload_6, upload_7)
end
end
......@@ -248,14 +152,14 @@ describe Geo::AttachmentRegistryFinder, :geo do
failed_attachments = subject.find_failed_attachments
expect(failed_attachments).to match_array([upload_1, upload_3, upload_6])
expect(failed_attachments).to match_ids(upload_1, upload_3, upload_6)
end
end
end
describe '#find_unsynced_attachments' do
it 'delegates to #legacy_find_unsynced_attachments' do
expect(subject).to receive(:legacy_find_unsynced_attachments).and_call_original
it 'delegates to the correct method' do
expect(subject).to receive("#{method_prefix}_find_unsynced_attachments".to_sym).and_call_original
subject.find_unsynced_attachments(batch_size: 10)
end
......@@ -265,15 +169,15 @@ describe Geo::AttachmentRegistryFinder, :geo do
uploads = subject.find_unsynced_attachments(batch_size: 10)
expect(uploads).to match_array([upload_2, upload_3, upload_4])
expect(uploads).to match_ids(upload_2, upload_3, upload_4)
end
it 'excludes uploads without an entry on the tracking database' do
create(:geo_file_registry, :avatar, file_id: upload_1.id, success: true)
uploads = subject.find_unsynced_attachments(batch_size: 10, except_registry_ids: [upload_2.id])
uploads = subject.find_unsynced_attachments(batch_size: 10, except_file_ids: [upload_2.id])
expect(uploads).to match_array([upload_3, upload_4])
expect(uploads).to match_ids(upload_3, upload_4)
end
it 'excludes remote uploads without an entry on the tracking database' do
......@@ -281,8 +185,48 @@ describe Geo::AttachmentRegistryFinder, :geo do
uploads = subject.find_unsynced_attachments(batch_size: 10)
expect(uploads).not_to include(upload_8, upload_9)
expect(uploads).to match_ids(upload_2, upload_3, upload_4)
end
end
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
context 'FDW', :delete do
before do
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo::Fdw.enabled?
end
include_examples 'finds all the things' do
let(:method_prefix) { 'fdw' }
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it '#find_synced_attachments falls back to legacy queries' do
expect(subject).to receive(:legacy_find_synced_attachments)
subject.find_synced_attachments
end
it '#find_failed_attachments falls back to legacy queries' do
expect(subject).to receive(:legacy_find_failed_attachments)
subject.find_failed_attachments
end
end
end
context 'Legacy' do
before do
allow(Gitlab::Geo::Fdw).to receive(:enabled?).and_return(false)
end
include_examples 'finds all the things' do
let(:method_prefix) { 'legacy' }
end
end
end
......@@ -20,8 +20,8 @@ describe Geo::JobArtifactRegistryFinder, :geo do
end
describe '#count_synced_job_artifacts' do
it 'delegates to #find_synced_job_artifacts_registries' do
expect(subject).to receive(:find_synced_job_artifacts_registries).and_call_original
it 'delegates to #legacy_find_synced_job_artifacts' do
expect(subject).to receive(:legacy_find_synced_job_artifacts).and_call_original
subject.count_synced_job_artifacts
end
......@@ -34,6 +34,15 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_synced_job_artifacts).to eq 2
end
it 'ignores remote job artifacts' do
create(:geo_file_registry, :job_artifact, file_id: job_artifact_1.id)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_2.id)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_3.id)
job_artifact_1.update!(file_store: ObjectStorage::Store::REMOTE)
expect(subject.count_synced_job_artifacts).to eq 2
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
......@@ -52,12 +61,21 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_synced_job_artifacts).to eq 1
end
it 'ignores remote job artifacts' do
create(:geo_file_registry, :job_artifact, file_id: job_artifact_1.id)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_2.id)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_3.id)
job_artifact_1.update!(file_store: ObjectStorage::Store::REMOTE)
expect(subject.count_synced_job_artifacts).to eq 1
end
end
end
describe '#count_failed_job_artifacts' do
it 'delegates to #find_failed_job_artifacts_registries' do
expect(subject).to receive(:find_failed_job_artifacts_registries).and_call_original
it 'delegates to #legacy_find_failed_job_artifacts' do
expect(subject).to receive(:legacy_find_failed_job_artifacts).and_call_original
subject.count_failed_job_artifacts
end
......@@ -70,6 +88,15 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_failed_job_artifacts).to eq 2
end
it 'ignores remote job artifacts' do
create(:geo_file_registry, :job_artifact, file_id: job_artifact_1.id, success: false)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_2.id, success: false)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_3.id, success: false)
job_artifact_1.update!(file_store: ObjectStorage::Store::REMOTE)
expect(subject.count_failed_job_artifacts).to eq 2
end
context 'with selective sync' do
before do
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
......@@ -93,19 +120,22 @@ describe Geo::JobArtifactRegistryFinder, :geo do
expect(subject.count_failed_job_artifacts).to eq 0
end
end
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
context 'FDW', :delete do
before do
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo::Fdw.enabled?
it 'ignores remote job artifacts' do
create(:geo_file_registry, :job_artifact, file_id: job_artifact_1.id, success: false)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_2.id, success: false)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_3.id, success: false)
job_artifact_1.update!(file_store: ObjectStorage::Store::REMOTE)
expect(subject.count_failed_job_artifacts).to eq 1
end
end
end
shared_examples 'finds all the things' do
describe '#find_unsynced_job_artifacts' do
it 'delegates to #fdw_find_unsynced_job_artifacts' do
expect(subject).to receive(:fdw_find_unsynced_job_artifacts).and_call_original
it 'delegates to the correct method' do
expect(subject).to receive("#{method_prefix}_find_unsynced_job_artifacts".to_sym).and_call_original
subject.find_unsynced_job_artifacts(batch_size: 10)
end
......@@ -116,49 +146,39 @@ describe Geo::JobArtifactRegistryFinder, :geo do
job_artifacts = subject.find_unsynced_job_artifacts(batch_size: 10)
expect(job_artifacts.map(&:id)).to match_array([job_artifact_2.id, job_artifact_4.id])
expect(job_artifacts).to match_ids(job_artifact_2, job_artifact_4)
end
it 'excludes job artifacts without an entry on the tracking database' do
create(:geo_file_registry, :job_artifact, file_id: job_artifact_1.id, success: true)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_3.id, success: false)
job_artifacts = subject.find_unsynced_job_artifacts(batch_size: 10, except_registry_ids: [job_artifact_2.id])
job_artifacts = subject.find_unsynced_job_artifacts(batch_size: 10, except_file_ids: [job_artifact_2.id])
expect(job_artifacts.map(&:id)).to match_array([job_artifact_4.id])
expect(job_artifacts).to match_ids(job_artifact_4)
end
end
end
context 'Legacy' do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
context 'FDW', :delete do
before do
allow(Gitlab::Geo::Fdw).to receive(:enabled?).and_return(false)
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo::Fdw.enabled?
end
describe '#find_unsynced_job_artifacts' do
it 'delegates to #legacy_find_unsynced_job_artifacts' do
expect(subject).to receive(:legacy_find_unsynced_job_artifacts).and_call_original
subject.find_unsynced_job_artifacts(batch_size: 10)
end
it 'returns job artifacts without an entry on the tracking database' do
create(:geo_file_registry, :job_artifact, file_id: job_artifact_1.id, success: true)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_3.id, success: false)
job_artifacts = subject.find_unsynced_job_artifacts(batch_size: 10)
expect(job_artifacts).to match_array([job_artifact_2, job_artifact_4])
end
it 'excludes job artifacts without an entry on the tracking database' do
create(:geo_file_registry, :job_artifact, file_id: job_artifact_1.id, success: true)
create(:geo_file_registry, :job_artifact, file_id: job_artifact_3.id, success: false)
include_examples 'finds all the things' do
let(:method_prefix) { 'fdw' }
end
end
job_artifacts = subject.find_unsynced_job_artifacts(batch_size: 10, except_registry_ids: [job_artifact_2.id])
context 'Legacy' do
before do
allow(Gitlab::Geo::Fdw).to receive(:enabled?).and_return(false)
end
expect(job_artifacts).to match_array([job_artifact_4])
end
include_examples 'finds all the things' do
let(:method_prefix) { 'legacy' }
end
end
end
......@@ -20,8 +20,8 @@ describe Geo::LfsObjectRegistryFinder, :geo do
end
describe '#count_synced_lfs_objects' do
it 'delegates to #find_synced_lfs_objects_registries' do
expect(subject).to receive(:find_synced_lfs_objects_registries).and_call_original
it 'delegates to #legacy_find_synced_lfs_objects' do
expect(subject).to receive(:legacy_find_synced_lfs_objects).and_call_original
subject.count_synced_lfs_objects
end
......@@ -34,8 +34,23 @@ describe Geo::LfsObjectRegistryFinder, :geo do
expect(subject.count_synced_lfs_objects).to eq 2
end
it 'ignores remote LFS objects' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id)
create(:geo_file_registry, :lfs, file_id: lfs_object_2.id)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id)
lfs_object_1.update!(file_store: ObjectStorage::Store::REMOTE)
expect(subject.count_synced_lfs_objects).to eq 2
end
context 'with selective sync' do
before do
allow_any_instance_of(LfsObjectsProject).to receive(:update_project_statistics).and_return(nil)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_3)
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
......@@ -46,15 +61,18 @@ describe Geo::LfsObjectRegistryFinder, :geo do
end
it 'counts LFS objects that has been synced' do
allow_any_instance_of(LfsObjectsProject).to receive(:update_project_statistics).and_return(nil)
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: false)
create(:geo_file_registry, :lfs, file_id: lfs_object_2.id)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_3)
expect(subject.count_synced_lfs_objects).to eq 1
end
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: false)
it 'ignores remote LFS objects' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id)
create(:geo_file_registry, :lfs, file_id: lfs_object_2.id)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id)
lfs_object_1.update!(file_store: ObjectStorage::Store::REMOTE)
expect(subject.count_synced_lfs_objects).to eq 1
end
......@@ -62,8 +80,8 @@ describe Geo::LfsObjectRegistryFinder, :geo do
end
describe '#count_failed_lfs_objects' do
it 'delegates to #find_failed_lfs_objects_registries' do
expect(subject).to receive(:find_failed_lfs_objects_registries).and_call_original
it 'delegates to #legacy_find_failed_lfs_objects' do
expect(subject).to receive(:legacy_find_failed_lfs_objects).and_call_original
subject.count_failed_lfs_objects
end
......@@ -76,8 +94,23 @@ describe Geo::LfsObjectRegistryFinder, :geo do
expect(subject.count_failed_lfs_objects).to eq 2
end
it 'ignores remote LFS objects' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: false)
create(:geo_file_registry, :lfs, file_id: lfs_object_2.id, success: false)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
lfs_object_1.update!(file_store: ObjectStorage::Store::REMOTE)
expect(subject.count_failed_lfs_objects).to eq 2
end
context 'with selective sync' do
before do
allow_any_instance_of(LfsObjectsProject).to receive(:update_project_statistics).and_return(nil)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_3)
secondary.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
......@@ -88,31 +121,28 @@ describe Geo::LfsObjectRegistryFinder, :geo do
end
it 'counts LFS objects that sync has failed' do
allow_any_instance_of(LfsObjectsProject).to receive(:update_project_statistics).and_return(nil)
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: false)
create(:geo_file_registry, :lfs, file_id: lfs_object_2.id)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_1)
create(:lfs_objects_project, project: synced_project, lfs_object: lfs_object_2)
create(:lfs_objects_project, project: unsynced_project, lfs_object: lfs_object_3)
expect(subject.count_failed_lfs_objects).to eq 1
end
it 'ignores remote LFS objects' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: false)
create(:geo_file_registry, :lfs, file_id: lfs_object_2.id)
create(:geo_file_registry, :lfs, file_id: lfs_object_2.id, success: false)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
lfs_object_1.update!(file_store: ObjectStorage::Store::REMOTE)
expect(subject.count_failed_lfs_objects).to eq 1
end
end
end
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
context 'FDW', :delete do
before do
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo::Fdw.enabled?
end
shared_examples 'finds all the things' do
describe '#find_unsynced_lfs_objects' do
it 'delegates to #fdw_find_unsynced_lfs_objects' do
expect(subject).to receive(:fdw_find_unsynced_lfs_objects).and_call_original
it 'delegates to the correct method' do
expect(subject).to receive("#{method_prefix}_find_unsynced_lfs_objects".to_sym).and_call_original
subject.find_unsynced_lfs_objects(batch_size: 10)
end
......@@ -123,49 +153,39 @@ describe Geo::LfsObjectRegistryFinder, :geo do
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10)
expect(lfs_objects.map(&:id)).to match_array([lfs_object_2.id, lfs_object_4.id])
expect(lfs_objects).to match_ids(lfs_object_2, lfs_object_4)
end
it 'excludes LFS objects without an entry on the tracking database' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: true)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10, except_registry_ids: [lfs_object_2.id])
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10, except_file_ids: [lfs_object_2.id])
expect(lfs_objects.map(&:id)).to match_array([lfs_object_4.id])
expect(lfs_objects).to match_ids(lfs_object_4)
end
end
end
context 'Legacy' do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
context 'FDW', :delete do
before do
allow(Gitlab::Geo::Fdw).to receive(:enabled?).and_return(false)
skip('FDW is not configured') if Gitlab::Database.postgresql? && !Gitlab::Geo::Fdw.enabled?
end
describe '#find_unsynced_lfs_objects' do
it 'delegates to #legacy_find_unsynced_lfs_objects' do
expect(subject).to receive(:legacy_find_unsynced_lfs_objects).and_call_original
subject.find_unsynced_lfs_objects(batch_size: 10)
end
it 'returns LFS objects without an entry on the tracking database' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: true)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10)
expect(lfs_objects).to match_array([lfs_object_2, lfs_object_4])
end
it 'excludes LFS objects without an entry on the tracking database' do
create(:geo_file_registry, :lfs, file_id: lfs_object_1.id, success: true)
create(:geo_file_registry, :lfs, file_id: lfs_object_3.id, success: false)
include_examples 'finds all the things' do
let(:method_prefix) { 'fdw' }
end
end
lfs_objects = subject.find_unsynced_lfs_objects(batch_size: 10, except_registry_ids: [lfs_object_2.id])
context 'Legacy' do
before do
allow(Gitlab::Geo::Fdw).to receive(:enabled?).and_return(false)
end
expect(lfs_objects).to match_array([lfs_object_4])
end
include_examples 'finds all the things' do
let(:method_prefix) { 'legacy' }
end
end
end
RSpec::Matchers.define :match_ids do |*expected|
match do |actual|
actual_ids = map_ids(actual)
expected_ids = map_ids(expected)
expect(actual_ids).to match_array(expected_ids)
end
description do
'matches elements by ids'
end
def map_ids(elements)
elements = elements.flatten if elements.respond_to?(:flatten)
if elements.respond_to?(:map)
elements.map(&:id)
elsif elements.respond_to?(:id)
[elements.id]
else
raise ArgumentError, "could not map elements to ids: #{elements}"
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment