Commit b83f297f authored by Valery Sizov's avatar Valery Sizov Committed by Douglas Barbosa Alexandre

Geo: Improve performance of selective sync worker

Instead of check the project registry existance for every project
we use intarnal join in the main query to decrease load to
database
parent 1463e91e
...@@ -14,6 +14,18 @@ module Geo ...@@ -14,6 +14,18 @@ module Geo
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink' has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :namespaces, class_name: 'Geo::Fdw::Namespace', through: :geo_node_namespace_links has_many :namespaces, class_name: 'Geo::Fdw::Namespace', through: :geo_node_namespace_links
def projects_outside_selective_sync
projects = if selective_sync_by_namespaces?
projects_outside_selected_namespaces
elsif selective_sync_by_shards?
projects_outside_selected_shards
else
Geo::Fdw::Project.none
end
projects.inner_join_project_registry
end
def job_artifacts def job_artifacts
Geo::Fdw::Ci::JobArtifact.all unless selective_sync? Geo::Fdw::Ci::JobArtifact.all unless selective_sync?
......
...@@ -3,12 +3,15 @@ ...@@ -3,12 +3,15 @@
module Geo module Geo
module Fdw module Fdw
class Namespace < ::Geo::BaseFdw class Namespace < ::Geo::BaseFdw
include Routable
self.primary_key = :id self.primary_key = :id
self.inheritance_column = nil self.inheritance_column = nil
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('namespaces') self.table_name = Gitlab::Geo::Fdw.foreign_table_name('namespaces')
has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink' has_many :geo_node_namespace_links, class_name: 'Geo::Fdw::GeoNodeNamespaceLink'
has_many :geo_nodes, class_name: 'Geo::Fdw::GeoNode', through: :geo_node_namespace_links has_many :geo_nodes, class_name: 'Geo::Fdw::GeoNode', through: :geo_node_namespace_links
belongs_to :parent, class_name: "Namespace"
end end
end end
end end
...@@ -4,6 +4,7 @@ module Geo ...@@ -4,6 +4,7 @@ module Geo
module Fdw module Fdw
class Project < ::Geo::BaseFdw class Project < ::Geo::BaseFdw
include Gitlab::SQL::Pattern include Gitlab::SQL::Pattern
include Routable
self.primary_key = :id self.primary_key = :id
self.table_name = Gitlab::Geo::Fdw.foreign_table_name('projects') self.table_name = Gitlab::Geo::Fdw.foreign_table_name('projects')
...@@ -11,6 +12,32 @@ module Geo ...@@ -11,6 +12,32 @@ module Geo
has_many :job_artifacts, class_name: 'Geo::Fdw::Ci::JobArtifact' has_many :job_artifacts, class_name: 'Geo::Fdw::Ci::JobArtifact'
has_many :lfs_objects_projects, class_name: 'Geo::Fdw::LfsObjectsProject' has_many :lfs_objects_projects, class_name: 'Geo::Fdw::LfsObjectsProject'
has_many :lfs_objects, class_name: 'Geo::Fdw::LfsObject', through: :lfs_objects_projects has_many :lfs_objects, class_name: 'Geo::Fdw::LfsObject', through: :lfs_objects_projects
belongs_to :namespace, class_name: 'Geo::Fdw::Namespace'
scope :outside_shards, -> (shard_names) { where.not(repository_storage: Array(shard_names)) }
alias_method :parent, :namespace
delegate :disk_path, to: :storage
def hashed_storage?(feature)
raise ArgumentError, _("Invalid feature") unless ::Project::HASHED_STORAGE_FEATURES.include?(feature)
self.storage_version && self.storage_version >= ::Project::HASHED_STORAGE_FEATURES[feature]
end
def repository
@repository ||= Repository.new(full_path, self, disk_path: disk_path)
end
def storage
@storage ||=
if hashed_storage?(:repository)
Storage::HashedProject.new(self)
else
Storage::LegacyProject.new(self)
end
end
class << self class << self
def missing_project_registry def missing_project_registry
...@@ -42,8 +69,6 @@ module Geo ...@@ -42,8 +69,6 @@ module Geo
where(repository_storage: Array(shard_names)) where(repository_storage: Array(shard_names))
end end
private
def inner_join_project_registry def inner_join_project_registry
join_statement = join_statement =
arel_table arel_table
...@@ -53,6 +78,8 @@ module Geo ...@@ -53,6 +78,8 @@ module Geo
joins(join_statement.join_sources) joins(join_statement.join_sources)
end end
private
def left_outer_join_project_registry def left_outer_join_project_registry
join_statement = join_statement =
arel_table arel_table
......
...@@ -14,10 +14,10 @@ module Geo ...@@ -14,10 +14,10 @@ module Geo
# rubocop:disable CodeReuse/ActiveRecord # rubocop:disable CodeReuse/ActiveRecord
def perform(geo_node_id) def perform(geo_node_id)
try_obtain_lease do try_obtain_lease do
node = GeoNode.find(geo_node_id) node = Geo::Fdw::GeoNode.find(geo_node_id)
break unless node.selective_sync? break unless node.selective_sync?
projects_to_clean_up(node).find_in_batches(batch_size: BATCH_SIZE) do |batch| node.projects_outside_selective_sync.find_in_batches(batch_size: BATCH_SIZE) do |batch|
batch.each do |project| batch.each do |project|
clean_up_repositories(project) clean_up_repositories(project)
end end
...@@ -30,20 +30,7 @@ module Geo ...@@ -30,20 +30,7 @@ module Geo
private private
def projects_to_clean_up(node)
if node.selective_sync_by_namespaces?
node.projects_outside_selected_namespaces
elsif node.selective_sync_by_shards?
node.projects_outside_selected_shards
else
Project.none
end
end
# rubocop:disable CodeReuse/ActiveRecord
def clean_up_repositories(project) def clean_up_repositories(project)
return unless Geo::ProjectRegistry.exists?(project_id: project.id)
job_id = ::Geo::RepositoryCleanupWorker.perform_async(project.id, project.name, project.disk_path, project.repository.storage) job_id = ::Geo::RepositoryCleanupWorker.perform_async(project.id, project.name, project.disk_path, project.repository.storage)
if job_id if job_id
...@@ -52,7 +39,6 @@ module Geo ...@@ -52,7 +39,6 @@ module Geo
log_error('Could not schedule a repository clean up', project_id: project.id, shard: project.repository.storage, disk_path: project.disk_path) log_error('Could not schedule a repository clean up', project_id: project.id, shard: project.repository.storage, disk_path: project.disk_path)
end end
end end
# rubocop:enable CodeReuse/ActiveRecord
def lease_timeout def lease_timeout
LEASE_TIMEOUT LEASE_TIMEOUT
......
---
title: 'Geo: Improve performance of clean up worker for selective sync'
merge_request:
author:
type: performance
...@@ -3,12 +3,6 @@ ...@@ -3,12 +3,6 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
context 'relationships' do
it { is_expected.to have_many(:geo_node_namespace_links).class_name('Geo::Fdw::GeoNodeNamespaceLink') }
it { is_expected.to have_many(:namespaces).class_name('Geo::Fdw::Namespace').through(:geo_node_namespace_links) }
end
describe '#projects', :geo_fdw do
let(:node) { create(:geo_node) } let(:node) { create(:geo_node) }
let(:group_1) { create(:group) } let(:group_1) { create(:group) }
let(:group_2) { create(:group) } let(:group_2) { create(:group) }
...@@ -17,6 +11,12 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do ...@@ -17,6 +11,12 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
let(:project_2) { create(:project, group: nested_group_1) } let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) } let(:project_3) { create(:project, :broken_storage, group: group_2) }
context 'relationships' do
it { is_expected.to have_many(:geo_node_namespace_links).class_name('Geo::Fdw::GeoNodeNamespaceLink') }
it { is_expected.to have_many(:namespaces).class_name('Geo::Fdw::Namespace').through(:geo_node_namespace_links) }
end
describe '#projects', :geo_fdw do
subject { described_class.find(node.id) } subject { described_class.find(node.id) }
it 'returns all registries without selective sync' do it 'returns all registries without selective sync' do
...@@ -45,13 +45,6 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do ...@@ -45,13 +45,6 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
# Disable transactions via :delete method because a foreign table # Disable transactions via :delete method because a foreign table
# can't see changes inside a transaction of a different connection. # can't see changes inside a transaction of a different connection.
describe '#project_registries', :geo_fdw do describe '#project_registries', :geo_fdw do
let(:node) { create(:geo_node) }
let(:group_1) { create(:group) }
let(:group_2) { create(:group) }
let(:nested_group_1) { create(:group, parent: group_1) }
let(:project_1) { create(:project, group: group_1) }
let(:project_2) { create(:project, group: nested_group_1) }
let(:project_3) { create(:project, :broken_storage, group: group_2) }
let!(:registry_1) { create(:geo_project_registry, project: project_1) } let!(:registry_1) { create(:geo_project_registry, project: project_1) }
let!(:registry_2) { create(:geo_project_registry, project: project_2) } let!(:registry_2) { create(:geo_project_registry, project: project_2) }
let!(:registry_3) { create(:geo_project_registry, project: project_3) } let!(:registry_3) { create(:geo_project_registry, project: project_3) }
...@@ -80,4 +73,62 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do ...@@ -80,4 +73,62 @@ RSpec.describe Geo::Fdw::GeoNode, :geo, type: :model do
expect(subject.project_registries).to be_empty expect(subject.project_registries).to be_empty
end end
end end
describe '#projects_outside_selective_sync', :geo_fdw do
subject { described_class.find(node.id) }
let(:synced_group) { create(:group) }
let(:synced_subgroup) { create(:group, parent: synced_group) }
let(:unsynced_group) { create(:group) }
let(:project_1) { create(:project, group: synced_group) }
let(:project_2) { create(:project, group: synced_group) }
let!(:project_3) { create(:project, :repository, group: unsynced_group) }
let(:project_4) { create(:project, :repository, group: unsynced_group) }
let(:project_5) { create(:project, group: synced_subgroup) }
let(:project_6) { create(:project, group: synced_subgroup) }
let(:project_7) { create(:project) }
let(:project_8) { create(:project) }
before do
create(:geo_project_registry, project: project_1)
create(:geo_project_registry, project: project_2)
create(:geo_project_registry, project: project_4)
create(:geo_project_registry, project: project_5)
create(:geo_project_registry, project: project_6)
create(:geo_project_registry, project: project_7)
create(:geo_project_registry, project: project_8)
end
def projects_to_fdw(projects)
projects.map { |project| Geo::Fdw::Project.find(project.id) }
end
context 'with selective sync by namespace' do
before do
node.update!(selective_sync_type: 'namespaces', namespaces: [synced_group])
end
it 'returns projects that does not belong to the selected namespaces' do
expected_projects = projects_to_fdw([project_4, project_7, project_8])
expect(subject.projects_outside_selective_sync).to eq(expected_projects)
end
end
context 'with selective sync by shard' do
before do
project_7.update_column(:repository_storage, 'broken')
project_8.update_column(:repository_storage, 'broken')
node.update!(selective_sync_type: 'shards', selective_sync_shards: ['broken'])
end
it 'returns synced projects that does not belong to the selected shards' do
expected_projects = projects_to_fdw([project_1, project_2, project_4, project_5, project_6])
expect(subject.projects_outside_selective_sync).to eq(expected_projects)
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment