Add method to find registries differences for containers

This method is used to find untracked container
repositories that we need to sync and unused
registries in the tracking database that we
can remove.
parent 3d5d66b0
...@@ -20,6 +20,16 @@ module Geo ...@@ -20,6 +20,16 @@ module Geo
Geo::ContainerRepositoryRegistry.count Geo::ContainerRepositoryRegistry.count
end end
def find_registry_differences(range)
source_ids = Gitlab::Geo.current_node.container_repositories.id_in(range).pluck_primary_key
tracked_ids = Geo::ContainerRepositoryRegistry.pluck_model_ids_in_range(range)
untracked_ids = source_ids - tracked_ids
unused_tracked_ids = tracked_ids - source_ids
[untracked_ids, unused_tracked_ids]
end
# Find limited amount of non replicated container repositories. # Find limited amount of non replicated container repositories.
# #
# You can pass a list with `except_repository_ids:` so you can exclude items you # You can pass a list with `except_repository_ids:` so you can exclude items you
...@@ -41,7 +51,7 @@ module Geo ...@@ -41,7 +51,7 @@ module Geo
Geo::ContainerRepositoryRegistry Geo::ContainerRepositoryRegistry
.failed .failed
.retry_due .retry_due
.repository_id_not_in(except_repository_ids) .model_id_not_in(except_repository_ids)
.limit(batch_size) .limit(batch_size)
.pluck_container_repository_key .pluck_container_repository_key
end end
......
...@@ -5,7 +5,7 @@ module EE ...@@ -5,7 +5,7 @@ module EE
extend ActiveSupport::Concern extend ActiveSupport::Concern
prepended do prepended do
scope :project_id_in, ->(ids) { joins(:project).merge(Project.id_in(ids)) } scope :project_id_in, ->(ids) { joins(:project).merge(::Project.id_in(ids)) }
end end
def push_blob(digest, file_path) def push_blob(digest, file_path)
......
...@@ -3,9 +3,11 @@ ...@@ -3,9 +3,11 @@
class Geo::ContainerRepositoryRegistry < Geo::BaseRegistry class Geo::ContainerRepositoryRegistry < Geo::BaseRegistry
include ::Delay include ::Delay
MODEL_CLASS = ::ContainerRepository
MODEL_FOREIGN_KEY = :container_repository_id
belongs_to :container_repository belongs_to :container_repository
scope :repository_id_not_in, -> (ids) { where.not(container_repository_id: ids) }
scope :failed, -> { with_state(:failed) } scope :failed, -> { with_state(:failed) }
scope :synced, -> { with_state(:synced) } scope :synced, -> { with_state(:synced) }
scope :retry_due, -> { where(arel_table[:retry_at].eq(nil).or(arel_table[:retry_at].lt(Time.current))) } scope :retry_due, -> { where(arel_table[:retry_at].eq(nil).or(arel_table[:retry_at].lt(Time.current))) }
......
# frozen_string_literal: true # frozen_string_literal: true
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo do
include ::EE::GeoHelpers include ::EE::GeoHelpers
context 'count all the things', :geo_fdw do
let!(:secondary) { create(:geo_node) } let!(:secondary) { create(:geo_node) }
let!(:container_repository) { create(:container_repository) } let!(:container_repository) { create(:container_repository) }
let!(:failed_registry) { create(:container_repository_registry, :sync_failed) } let!(:failed_registry) { create(:container_repository_registry, :sync_failed) }
...@@ -20,7 +21,6 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do ...@@ -20,7 +21,6 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do
stub_current_geo_node(secondary) stub_current_geo_node(secondary)
end end
context 'count all the things' do
describe '#count_syncable' do describe '#count_syncable' do
it 'returns number of container repositories' do it 'returns number of container repositories' do
result = subject.count_syncable result = subject.count_syncable
...@@ -54,7 +54,23 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do ...@@ -54,7 +54,23 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do
end end
end end
context 'find all the things' do context 'find all the things', :geo_fdw do
let!(:secondary) { create(:geo_node) }
let!(:container_repository) { create(:container_repository) }
let!(:failed_registry) { create(:container_repository_registry, :sync_failed) }
let!(:synced_registry) { create(:container_repository_registry, :synced) }
let(:synced_group) { create(:group) }
let(:unsynced_group) { create(:group) }
let(:synced_project) { create(:project, group: synced_group) }
let(:unsynced_project) { create(:project, :broken_storage, group: unsynced_group) }
subject { described_class.new(current_node_id: secondary.id) }
before do
stub_current_geo_node(secondary)
end
describe '#find_unsynced' do describe '#find_unsynced' do
it 'returns repositories without an entry in the tracking database' do it 'returns repositories without an entry in the tracking database' do
repositories = subject.find_unsynced(batch_size: 10) repositories = subject.find_unsynced(batch_size: 10)
...@@ -112,4 +128,155 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do ...@@ -112,4 +128,155 @@ RSpec.describe Geo::ContainerRepositoryRegistryFinder, :geo, :geo_fdw do
end end
end end
end end
describe '#find_registry_differences' do
let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let_it_be(:nested_group) { create(:group, parent: synced_group) }
let_it_be(:project_synced_group) { create(:project, group: synced_group) }
let_it_be(:project_nested_group) { create(:project, group: nested_group) }
let_it_be(:project_broken_storage) { create(:project, :broken_storage) }
let_it_be(:container_repository_1) { create(:container_repository, project: project_synced_group) }
let_it_be(:container_repository_2) { create(:container_repository, project: project_nested_group) }
let_it_be(:container_repository_3) { create(:container_repository) }
let_it_be(:container_repository_4) { create(:container_repository) }
let_it_be(:container_repository_5) { create(:container_repository, project: project_broken_storage) }
let_it_be(:container_repository_6) { create(:container_repository, project: project_broken_storage) }
before do
stub_current_geo_node(secondary)
end
context 'untracked IDs' do
before do
create(:container_repository_registry, container_repository_id: container_repository_1.id)
create(:container_repository_registry, :sync_failed, container_repository_id: container_repository_3.id)
create(:container_repository_registry, container_repository_id: container_repository_5.id)
end
it 'includes container registries IDs without an entry on the tracking database' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id, container_repository_4.id, container_repository_6.id])
end
it 'excludes container registries outside the ID range' do
untracked_ids, _ = subject.find_registry_differences(container_repository_4.id..container_repository_6.id)
expect(untracked_ids).to match_array([container_repository_4.id, container_repository_6.id])
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
it 'excludes container_registry IDs that projects are not in the selected namespaces' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_2.id])
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
it 'excludes container_registry IDs that projects are not in the selected shards' do
range = ContainerRepository.minimum(:id)..ContainerRepository.maximum(:id)
untracked_ids, _ = subject.find_registry_differences(range)
expect(untracked_ids).to match_array([container_repository_6.id])
end
end
end
context 'unused tracked IDs' do
context 'with an orphaned registry' do
let!(:orphaned) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
before do
container_repository_1.delete
end
it 'includes tracked IDs that do not exist in the model table' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
it 'excludes IDs outside the ID range' do
range = (container_repository_1.id + 1)..ContainerRepository.maximum(:id)
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
context 'with selective sync by namespace' do
let(:secondary) { create(:geo_node, selective_sync_type: 'namespaces', namespaces: [synced_group]) }
context 'with a tracked container_registry' do
context 'excluded from selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_3.id) }
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_3.id..container_repository_3.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_3.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
context 'with selective sync by shard' do
let(:secondary) { create(:geo_node, selective_sync_type: 'shards', selective_sync_shards: ['broken']) }
context 'with a tracked container_registry' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_1.id) }
context 'excluded from selective sync' do
it 'includes tracked container_registry IDs that exist but are not in a selectively synced project' do
range = container_repository_1.id..container_repository_1.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to match_array([container_repository_1.id])
end
end
context 'included in selective sync' do
let!(:registry_entry) { create(:container_repository_registry, container_repository_id: container_repository_5.id) }
it 'excludes tracked container_registry IDs that are in selectively synced projects' do
range = container_repository_5.id..container_repository_5.id
_, unused_tracked_ids = subject.find_registry_differences(range)
expect(unused_tracked_ids).to be_empty
end
end
end
end
end
end
end end
...@@ -14,22 +14,6 @@ RSpec.describe Geo::ContainerRepositoryRegistry, :geo do ...@@ -14,22 +14,6 @@ RSpec.describe Geo::ContainerRepositoryRegistry, :geo do
it { is_expected.to belong_to(:container_repository) } it { is_expected.to belong_to(:container_repository) }
end end
describe 'scopes' do
describe '.repository_id_not_in' do
it 'returns registries scoped by ids' do
registry1 = create(:container_repository_registry)
registry2 = create(:container_repository_registry)
container_repository1_id = registry1.container_repository_id
container_repository2_id = registry2.container_repository_id
result = described_class.repository_id_not_in([container_repository1_id, container_repository2_id])
expect(result).to match_ids([registry])
end
end
end
it_behaves_like 'a Geo registry' do it_behaves_like 'a Geo registry' do
let(:registry) { create(:container_repository_registry) } let(:registry) { create(:container_repository_registry) }
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment