Commit 1621b323 authored by Michael Kozono's avatar Michael Kozono

Merge branch '223249-refactor-repositories-clean-up-worker' into 'master'

Geo - Refactor repositories clean up worker

See merge request gitlab-org/gitlab!38217
parents 0f1862d5 d3049d97
...@@ -3,30 +3,6 @@ ...@@ -3,30 +3,6 @@
module Geo::SelectiveSync module Geo::SelectiveSync
extend ActiveSupport::Concern extend ActiveSupport::Concern
def projects_outside_selected_namespaces
return project_model.none unless selective_sync_by_namespaces?
cte_query = selected_namespaces_and_descendants_cte
cte_table = cte_query.table
join_statement =
projects_table
.join(cte_table, Arel::Nodes::OuterJoin)
.on(projects_table[:namespace_id].eq(cte_table[:id]))
project_model
.joins(join_statement.join_sources)
.where(cte_table[:id].eq(nil))
.with
.recursive(cte_query.to_arel)
end
def projects_outside_selected_shards
return project_model.none unless selective_sync_by_shards?
project_model.outside_shards(selective_sync_shards)
end
def selective_sync? def selective_sync?
selective_sync_type.present? selective_sync_type.present?
end end
......
...@@ -115,7 +115,6 @@ module EE ...@@ -115,7 +115,6 @@ module EE
scope :with_wiki_enabled, -> { with_feature_enabled(:wiki) } scope :with_wiki_enabled, -> { with_feature_enabled(:wiki) }
scope :within_shards, -> (shard_names) { where(repository_storage: Array(shard_names)) } scope :within_shards, -> (shard_names) { where(repository_storage: Array(shard_names)) }
scope :outside_shards, -> (shard_names) { where.not(repository_storage: Array(shard_names)) }
scope :verification_failed_repos, -> { joins(:repository_state).merge(ProjectRepositoryState.verification_failed_repos) } scope :verification_failed_repos, -> { joins(:repository_state).merge(ProjectRepositoryState.verification_failed_repos) }
scope :verification_failed_wikis, -> { joins(:repository_state).merge(ProjectRepositoryState.verification_failed_wikis) } scope :verification_failed_wikis, -> { joins(:repository_state).merge(ProjectRepositoryState.verification_failed_wikis) }
scope :for_plan_name, -> (name) { joins(namespace: { gitlab_subscription: :hosted_plan }).where(plans: { name: name }) } scope :for_plan_name, -> (name) { joins(namespace: { gitlab_subscription: :hosted_plan }).where(plans: { name: name }) }
......
...@@ -14,18 +14,6 @@ module Geo ...@@ -14,18 +14,6 @@ module Geo
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink' has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :namespaces, class_name: 'Geo::Fdw::Namespace', through: :geo_node_namespace_links has_many :namespaces, class_name: 'Geo::Fdw::Namespace', through: :geo_node_namespace_links
def projects_outside_selective_sync
projects = if selective_sync_by_namespaces?
projects_outside_selected_namespaces
elsif selective_sync_by_shards?
projects_outside_selected_shards
else
Geo::Fdw::Project.none
end
projects.inner_join_project_registry
end
def projects def projects
return Geo::Fdw::Project.all unless selective_sync? return Geo::Fdw::Project.all unless selective_sync?
......
...@@ -13,8 +13,6 @@ module Geo ...@@ -13,8 +13,6 @@ module Geo
belongs_to :namespace, class_name: 'Geo::Fdw::Namespace' belongs_to :namespace, class_name: 'Geo::Fdw::Namespace'
scope :outside_shards, -> (shard_names) { where.not(repository_storage: Array(shard_names)) }
alias_method :parent, :namespace alias_method :parent, :namespace
delegate :disk_path, to: :storage delegate :disk_path, to: :storage
...@@ -56,15 +54,6 @@ module Geo ...@@ -56,15 +54,6 @@ module Geo
def within_shards(shard_names) def within_shards(shard_names)
where(repository_storage: Array(shard_names)) where(repository_storage: Array(shard_names))
end end
def inner_join_project_registry
join_statement =
arel_table
.join(Geo::ProjectRegistry.arel_table, Arel::Nodes::InnerJoin)
.on(arel_table[:id].eq(Geo::ProjectRegistry.arel_table[:project_id]))
joins(join_statement.join_sources)
end
end end
end end
end end
......
...@@ -14,13 +14,14 @@ module Geo ...@@ -14,13 +14,14 @@ module Geo
# rubocop:disable CodeReuse/ActiveRecord # rubocop:disable CodeReuse/ActiveRecord
def perform(geo_node_id) def perform(geo_node_id)
try_obtain_lease do try_obtain_lease do
node = Geo::Fdw::GeoNode.find(geo_node_id) node = GeoNode.find(geo_node_id)
break unless node.selective_sync? break unless node.selective_sync?
node.projects_outside_selective_sync.find_in_batches(batch_size: BATCH_SIZE) do |batch| Geo::ProjectRegistry.select(:id, :project_id).find_in_batches(batch_size: BATCH_SIZE) do |registries|
batch.each do |project| tracked_project_ids = registries.map(&:project_id)
clean_up_repositories(project) replicable_project_ids = node.projects.id_in(tracked_project_ids).pluck_primary_key
end unused_tracked_project_ids = tracked_project_ids - replicable_project_ids
clean_up_repositories(unused_tracked_project_ids)
end end
end end
rescue ActiveRecord::RecordNotFound => error rescue ActiveRecord::RecordNotFound => error
...@@ -30,7 +31,15 @@ module Geo ...@@ -30,7 +31,15 @@ module Geo
private private
def clean_up_repositories(project) def clean_up_repositories(unused_tracked_project_ids)
unused_projects = Project.id_in(unused_tracked_project_ids)
unused_projects.each do |project|
clean_up_repository(project)
end
end
def clean_up_repository(project)
job_id = ::Geo::RepositoryCleanupWorker.perform_async(project.id, project.name, project.disk_path, project.repository.storage) job_id = ::Geo::RepositoryCleanupWorker.perform_async(project.id, project.name, project.disk_path, project.repository.storage)
if job_id if job_id
......
...@@ -41,62 +41,4 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do ...@@ -41,62 +41,4 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
expect(subject.projects).to be_empty expect(subject.projects).to be_empty
end end
end end
describe '#projects_outside_selective_sync', :geo_fdw do
subject { described_class.find(node.id) }
let(:synced_group) { create(:group) }
let(:synced_subgroup) { create(:group, parent: synced_group) }
let(:unsynced_group) { create(:group) }
let(:project_1) { create(:project, group: synced_group) }
let(:project_2) { create(:project, group: synced_group) }
let!(:project_3) { create(:project, :repository, group: unsynced_group) }
let(:project_4) { create(:project, :repository, group: unsynced_group) }
let(:project_5) { create(:project, group: synced_subgroup) }
let(:project_6) { create(:project, group: synced_subgroup) }
let(:project_7) { create(:project) }
let(:project_8) { create(:project) }
before do
create(:geo_project_registry, project: project_1)
create(:geo_project_registry, project: project_2)
create(:geo_project_registry, project: project_4)
create(:geo_project_registry, project: project_5)
create(:geo_project_registry, project: project_6)
create(:geo_project_registry, project: project_7)
create(:geo_project_registry, project: project_8)
end
def projects_to_fdw(projects)
projects.map { |project| Geo::Fdw::Project.find(project.id) }
end
context 'with selective sync by namespace' do
before do
node.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'returns projects that does not belong to the selected namespaces' do
expected_projects = projects_to_fdw([project_4, project_7, project_8])
expect(subject.projects_outside_selective_sync).to eq(expected_projects)
end
end
context 'with selective sync by shard' do
before do
project_7.update_column(:repository_storage, 'broken')
project_8.update_column(:repository_storage, 'broken')
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
end
it 'returns synced projects that does not belong to the selected shards' do
expected_projects = projects_to_fdw([project_1, project_2, project_4, project_5, project_6])
expect(subject.projects_outside_selective_sync).to eq(expected_projects)
end
end
end
end end
...@@ -2,30 +2,25 @@ ...@@ -2,30 +2,25 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::RepositoriesCleanUpWorker, :geo, :geo_fdw do RSpec.describe Geo::RepositoriesCleanUpWorker, :geo do
include ::EE::GeoHelpers include ::EE::GeoHelpers
include ExclusiveLeaseHelpers include ExclusiveLeaseHelpers
describe '#perform' do describe '#perform' do
let(:secondary) { create(:geo_node) } let_it_be(:secondary) { create(:geo_node) }
let_it_be(:synced_group) { create(:group) }
let(:synced_group) { create(:group) } let_it_be(:synced_subgroup) { create(:group, parent: synced_group) }
let(:synced_subgroup) { create(:group, parent: synced_group) } let_it_be(:unsynced_group) { create(:group) }
let(:unsynced_group) { create(:group) } let_it_be(:project_1) { create(:project, group: synced_group) }
let_it_be(:project_2) { create(:project, group: synced_group) }
let(:project_1) { create(:project, group: synced_group) } let_it_be(:project_3) { create(:project, :repository, group: unsynced_group) }
let(:project_2) { create(:project, group: synced_group) } let_it_be(:project_4) { create(:project, :repository, group: unsynced_group) }
let!(:project_3) { create(:project, :repository, group: unsynced_group) } let_it_be(:project_5) { create(:project, group: synced_subgroup) }
let(:project_4) { create(:project, :repository, group: unsynced_group) } let_it_be(:project_6) { create(:project, group: synced_subgroup) }
let(:project_5) { create(:project, group: synced_subgroup) } let_it_be(:project_7) { create(:project) }
let(:project_6) { create(:project, group: synced_subgroup) } let_it_be(:project_8) { create(:project) }
let(:project_7) { create(:project) }
let(:project_8) { create(:project) } before_all do
before do
stub_current_geo_node(secondary)
stub_exclusive_lease
create(:geo_project_registry, project: project_1) create(:geo_project_registry, project: project_1)
create(:geo_project_registry, project: project_2) create(:geo_project_registry, project: project_2)
create(:geo_project_registry, project: project_4) create(:geo_project_registry, project: project_4)
...@@ -35,6 +30,11 @@ RSpec.describe Geo::RepositoriesCleanUpWorker, :geo, :geo_fdw do ...@@ -35,6 +30,11 @@ RSpec.describe Geo::RepositoriesCleanUpWorker, :geo, :geo_fdw do
create(:geo_project_registry, project: project_8) create(:geo_project_registry, project: project_8)
end end
before do
stub_current_geo_node(secondary)
stub_exclusive_lease
end
it 'does not perform Geo::RepositoryCleanupWorker when cannnot obtain a lease' do it 'does not perform Geo::RepositoryCleanupWorker when cannnot obtain a lease' do
stub_exclusive_lease_taken stub_exclusive_lease_taken
...@@ -76,10 +76,8 @@ RSpec.describe Geo::RepositoriesCleanUpWorker, :geo, :geo_fdw do ...@@ -76,10 +76,8 @@ RSpec.describe Geo::RepositoriesCleanUpWorker, :geo, :geo_fdw do
subject.perform(secondary.id) subject.perform(secondary.id)
end end
it 'does not leave orphaned entries in the project_registry table' do it 'does not leave orphaned entries in the project_registry table', :sidekiq_inline do
Sidekiq::Testing.inline! do subject.perform(secondary.id)
subject.perform(secondary.id)
end
expect(Geo::ProjectRegistry.where(project_id: [project_3, project_4, project_7, project_8])).to be_empty expect(Geo::ProjectRegistry.where(project_id: [project_3, project_4, project_7, project_8])).to be_empty
end end
...@@ -109,10 +107,8 @@ RSpec.describe Geo::RepositoriesCleanUpWorker, :geo, :geo_fdw do ...@@ -109,10 +107,8 @@ RSpec.describe Geo::RepositoriesCleanUpWorker, :geo, :geo_fdw do
subject.perform(secondary.id) subject.perform(secondary.id)
end end
it 'does not leave orphaned entries in the project_registry table' do it 'does not leave orphaned entries in the project_registry table', :sidekiq_inline do
Sidekiq::Testing.inline! do subject.perform(secondary.id)
subject.perform(secondary.id)
end
expect(Geo::ProjectRegistry.where(project_id: [project_1, project_2, project_3, project_4, project_5, project_6])).to be_empty expect(Geo::ProjectRegistry.where(project_id: [project_1, project_2, project_3, project_4, project_5, project_6])).to be_empty
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment