Add method to find registries differences for containers

This method is used to find untracked container
repositories that we need to sync and unused
registries in the tracking database that we
can remove.
parent 3d5d66b0
......@@ -20,6 +20,16 @@ module Geo
Geo::ContainerRepositoryRegistry.count
end
def find_registry_differences(range)
source_ids = Gitlab::Geo.current_node.container_repositories.id_in(range).pluck_primary_key
tracked_ids = Geo::ContainerRepositoryRegistry.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
# Find limited amount of non replicated container repositories.
#
# You can pass a list with `except_repository_ids:` so you can exclude items you
......@@ -41,7 +51,7 @@ module Geo
Geo::ContainerRepositoryRegistry
.failed
.retry_due
.repository_id_not_in(except_repository_ids)
.model_id_not_in(except_repository_ids)
.limit(batch_size)
.pluck_container_repository_key
end
......
......@@ -5,7 +5,7 @@ module EE
extend ActiveSupport::Concern
prepended do
scope :project_id_in, ->(ids) { joins(:project).merge(Project.id_in(ids)) }
scope :project_id_in, ->(ids) { joins(:project).merge(::Project.id_in(ids)) }
end
def push_blob(digest, file_path)
......
......@@ -3,9 +3,11 @@
class Geo::ContainerRepositoryRegistry < Geo::BaseRegistry
include ::Delay
MODEL_CLASS = ::ContainerRepository
MODEL_FOREIGN_KEY = :container_repository_id
belongs_to :container_repository
scope :repository_id_not_in, -> (ids) { where.not(container_repository_id: ids) }
scope :failed, -> { with_state(:failed) }
scope :synced, -> { with_state(:synced) }
scope :retry_due, -> { where(arel_table[:retry_at].eq(nil).or(arel_table[:retry_at].lt(Time.current))) }
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do
RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do
include ::EE::GeoHelpers
let!(:secondary) { create(:geo_node) }
let!(:container_repository) { create(:container_repository) }
let!(:failed_registry) { create(:container_repository_registry, :sync_failed) }
let!(:synced_registry) { create(:container_repository_registry, :synced) }
context 'count all the things', :geo_fdw do
let!(:secondary) { create(:geo_node) }
let!(:container_repository) { create(:container_repository) }
let!(:failed_registry) { create(:container_repository_registry, :sync_failed) }
let!(:synced_registry) { create(:container_repository_registry, :synced) }
let(:synced_group) { create(:group) }
let(:unsynced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
let(:synced_group) { create(:group) }
let(:unsynced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
subject { described_class.new(current_node_id: secondary.id) }
subject { described_class.new(current_node_id: secondary.id) }
before do
stub_current_geo_node(secondary)
end
before do
stub_current_geo_node(secondary)
end
context 'count all the things' do
describe '#count_syncable' do
it 'returns number of container repositories' do
result = subject.count_syncable
......@@ -54,7 +54,23 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do
end
end
context 'find all the things' do
context 'find all the things', :geo_fdw do
let!(:secondary) { create(:geo_node) }
let!(:container_repository) { create(:container_repository) }
let!(:failed_registry) { create(:container_repository_registry, :sync_failed) }
let!(:synced_registry) { create(:container_repository_registry, :synced) }
let(:synced_group) { create(:group) }
let(:unsynced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
subject { described_class.new(current_node_id: secondary.id) }
before do
stub_current_geo_node(secondary)
end
describe '#find_unsynced' do
it 'returns repositories without an entry in the tracking database' do
repositories = subject.find_unsynced(batch_size: 10)
......@@ -112,4 +128,155 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do
end
end
end
describe '#find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_synced_group) { create(:project, group: synced_group) }
let_it_be(:project_nested_group) { create(:project, group: nested_group) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:container_repository_1) { create(:container_repository, project: project_synced_group) }
let_it_be(:container_repository_2) { create(:container_repository, project: project_nested_group) }
let_it_be(:container_repository_3) { create(:container_repository) }
let_it_be(:container_repository_4) { create(:container_repository) }
let_it_be(:container_repository_5) { create(:container_repository, project: project_broken_storage) }
let_it_be(:container_repository_6) { create(:container_repository, project: project_broken_storage) }
before do
stub_current_geo_node(secondary)
end
context 'untracked IDs' do
before do
create(:container_repository_registry, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
create(:container_repository_registry, container_repository_id: container_repository_5.id)
end
it 'includes container registries IDs without an entry on the tracking database' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id, container_repository_4.id, container_repository_6.id])
end
it 'excludes container registries outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(container_repository_4.id..container_repository_6.id)
expect(untracked_ids).to match_array([container_repository_4.id, container_repository_6.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes container_registry IDs that projects are not in the selected namespaces' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes container_registry IDs that projects are not in the selected shards' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
before do
container_repository_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
it 'excludes IDs outside the ID range' do
range = (container_repository_1.id + 1)..ContainerRepository.maximum(:id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked container_registry' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_3.id) }
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_3.id..container_repository_3.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked container_registry' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
context 'excluded from selective sync' do
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_5.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_5.id..container_repository_5.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
end
......@@ -14,22 +14,6 @@ RSpec.describe Geo::ContainerRepositoryRegistry, :geo do
it { is_expected.to belong_to(:container_repository) }
end
describe 'scopes' do
describe '.repository_id_not_in' do
it 'returns registries scoped by ids' do
registry1 = create(:container_repository_registry)
registry2 = create(:container_repository_registry)
container_repository1_id = registry1.container_repository_id
container_repository2_id = registry2.container_repository_id
result = described_class.repository_id_not_in([container_repository1_id, container_repository2_id])
expect(result).to match_ids([registry])
end
end
end
it_behaves_like 'a Geo registry' do
let(:registry) { create(:container_repository_registry) }
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment