Commit b83f297f authored by Valery Sizov's avatar Valery Sizov Committed by Douglas Barbosa Alexandre

Geo: Improve performance of selective sync worker

Instead of check the project registry existance for every project
we use intarnal join in the main query to decrease load to
database
parent 1463e91e
......@@ -14,6 +14,18 @@ module Geo
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :namespaces, class_name: 'Geo::Fdw::Namespace', through: :geo_node_namespace_links
def projects_outside_selective_sync
projects = if selective_sync_by_namespaces?
projects_outside_selected_namespaces
elsif selective_sync_by_shards?
projects_outside_selected_shards
else
Geo::Fdw::Project.none
end
projects.inner_join_project_registry
end
def job_artifacts
Geo::Fdw::Ci::JobArtifact.all unless selective_sync?
......
......@@ -3,12 +3,15 @@
module Geo
module Fdw
class Namespace < ::Geo::BaseFdw
include Routable
self.primary_key = :id
self.inheritance_column = nil
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('namespaces')
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :geo_nodes, class_name: 'Geo::Fdw::GeoNode', through: :geo_node_namespace_links
belongs_to :parent, class_name: "Namespace"
end
end
end
......@@ -4,6 +4,7 @@ module Geo
module Fdw
class Project < ::Geo::BaseFdw
include Gitlab::SQL::Pattern
include Routable
self.primary_key = :id
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('projects')
......@@ -11,6 +12,32 @@ module Geo
has_many :job_artifacts, class_name: 'Geo::Fdw::Ci::JobArtifact'
has_many :lfs_objects_projects, class_name: 'Geo::Fdw::LfsObjectsProject'
has_many :lfs_objects, class_name: 'Geo::Fdw::LfsObject', through: :lfs_objects_projects
belongs_to :namespace, class_name: 'Geo::Fdw::Namespace'
scope :outside_shards, -> (shard_names) { where.not(repository_storage: Array(shard_names)) }
alias_method :parent, :namespace
delegate :disk_path, to: :storage
def hashed_storage?(feature)
raise ArgumentError, _("Invalid feature") unless ::Project::HASHED_STORAGE_FEATURES.include?(feature)
self.storage_version && self.storage_version >= ::Project::HASHED_STORAGE_FEATURES[feature]
end
def repository
@repository ||= Repository.new(full_path, self, disk_path: disk_path)
end
def storage
@storage ||=
if hashed_storage?(:repository)
Storage::HashedProject.new(self)
else
Storage::LegacyProject.new(self)
end
end
class << self
def missing_project_registry
......@@ -42,8 +69,6 @@ module Geo
where(repository_storage: Array(shard_names))
end
private
def inner_join_project_registry
join_statement =
arel_table
......@@ -53,6 +78,8 @@ module Geo
joins(join_statement.join_sources)
end
private
def left_outer_join_project_registry
join_statement =
arel_table
......
......@@ -14,10 +14,10 @@ module Geo
# rubocop:disable CodeReuse/ActiveRecord
def perform(geo_node_id)
try_obtain_lease do
node = GeoNode.find(geo_node_id)
node = Geo::Fdw::GeoNode.find(geo_node_id)
break unless node.selective_sync?
projects_to_clean_up(node).find_in_batches(batch_size: BATCH_SIZE) do |batch|
node.projects_outside_selective_sync.find_in_batches(batch_size: BATCH_SIZE) do |batch|
batch.each do |project|
clean_up_repositories(project)
end
......@@ -30,20 +30,7 @@ module Geo
private
def projects_to_clean_up(node)
if node.selective_sync_by_namespaces?
node.projects_outside_selected_namespaces
elsif node.selective_sync_by_shards?
node.projects_outside_selected_shards
else
Project.none
end
end
# rubocop:disable CodeReuse/ActiveRecord
def clean_up_repositories(project)
return unless Geo::ProjectRegistry.exists?(project_id: project.id)
job_id = ::Geo::RepositoryCleanupWorker.perform_async(project.id, project.name, project.disk_path, project.repository.storage)
if job_id
......@@ -52,7 +39,6 @@ module Geo
log_error('Could not schedule a repository clean up', project_id: project.id, shard: project.repository.storage, disk_path: project.disk_path)
end
end
# rubocop:enable CodeReuse/ActiveRecord
def lease_timeout
LEASE_TIMEOUT
......
---
title: 'Geo: Improve performance of clean up worker for selective sync'
merge_request:
author:
type: performance
......@@ -3,20 +3,20 @@
require 'spec_helper'
RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
context 'relationships' do
it { is_expected.to have_many(:geo_node_namespace_links).class_name('Geo::Fdw::GeoNodeNamespaceLink') }
it { is_expected.to have_many(:namespaces).class_name('Geo::Fdw::Namespace').through(:geo_node_namespace_links) }
end
describe '#projects', :geo_fdw do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
subject { described_class.find(node.id) }
it 'returns all registries without selective sync' do
......@@ -45,13 +45,6 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
# Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection.
describe '#project_registries', :geo_fdw do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
let!(:registry_1) { create(:geo_project_registry, project: project_1) }
let!(:registry_2) { create(:geo_project_registry, project: project_2) }
let!(:registry_3) { create(:geo_project_registry, project: project_3) }
......@@ -80,4 +73,62 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
expect(subject.project_registries).to be_empty
end
end
describe '#projects_outside_selective_sync', :geo_fdw do
subject { described_class.find(node.id) }
let(:synced_group) { create(:group) }
let(:synced_subgroup) { create(:group, parent: synced_group) }
let(:unsynced_group) { create(:group) }
let(:project_1) { create(:project, group: synced_group) }
let(:project_2) { create(:project, group: synced_group) }
let!(:project_3) { create(:project, :repository, group: unsynced_group) }
let(:project_4) { create(:project, :repository, group: unsynced_group) }
let(:project_5) { create(:project, group: synced_subgroup) }
let(:project_6) { create(:project, group: synced_subgroup) }
let(:project_7) { create(:project) }
let(:project_8) { create(:project) }
before do
create(:geo_project_registry, project: project_1)
create(:geo_project_registry, project: project_2)
create(:geo_project_registry, project: project_4)
create(:geo_project_registry, project: project_5)
create(:geo_project_registry, project: project_6)
create(:geo_project_registry, project: project_7)
create(:geo_project_registry, project: project_8)
end
def projects_to_fdw(projects)
projects.map { |project| Geo::Fdw::Project.find(project.id) }
end
context 'with selective sync by namespace' do
before do
node.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'returns projects that does not belong to the selected namespaces' do
expected_projects = projects_to_fdw([project_4, project_7, project_8])
expect(subject.projects_outside_selective_sync).to eq(expected_projects)
end
end
context 'with selective sync by shard' do
before do
project_7.update_column(:repository_storage, 'broken')
project_8.update_column(:repository_storage, 'broken')
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
end
it 'returns synced projects that does not belong to the selected shards' do
expected_projects = projects_to_fdw([project_1, project_2, project_4, project_5, project_6])
expect(subject.projects_outside_selective_sync).to eq(expected_projects)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment