Commit 3649b65e authored by Mike Kozono's avatar Mike Kozono

Geo: Improve performance of LFS object queries

See discussion of various approaches at:
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/42423#note_418699307
parent 81c3e33c
......@@ -23,7 +23,7 @@ module EE
def replicables_for_geo_node(primary_key_in, node = ::Gitlab::Geo.current_node)
local_storage_only = !node.sync_object_storage
scope = node.lfs_objects.primary_key_in(primary_key_in)
scope = node.lfs_objects(primary_key_in: primary_key_in)
scope = scope.with_files_stored_locally if local_storage_only
scope
end
......
......@@ -265,23 +265,17 @@ class GeoNode < ApplicationRecord
designs.where(id: project_id).exists?
end
def lfs_objects
return LfsObject.all unless selective_sync?
query = LfsObjectsProject.project_id_in(projects).select(:lfs_object_id).distinct
cte = Gitlab::SQL::CTE.new(:restricted_lfs_objects, query)
lfs_object_table = LfsObject.arel_table
inner_join_restricted_lfs_objects =
cte.table
.join(lfs_object_table, Arel::Nodes::InnerJoin)
.on(cte.table[:lfs_object_id].eq(lfs_object_table[:id]))
.join_sources
LfsObject
.with(cte.to_arel)
.from(cte.table)
.joins(inner_join_restricted_lfs_objects)
# @param primary_key_in [Range, LfsObject] arg to pass to primary_key_in scope
# @return [ActiveRecord::Relation<LfsObject>] scope of LfsObject filtered by selective sync settings and primary key arg
def lfs_objects(primary_key_in:)
return LfsObject.primary_key_in(primary_key_in) unless selective_sync?
ids = LfsObjectsProject.project_id_in(projects)
.where(lfs_object_id: primary_key_in)
.select(:lfs_object_id)
.distinct
LfsObject.where(id: ids)
end
def projects
......
---
title: 'Geo: Improve performance of LFS objects queries'
merge_request: 42423
author:
type: performance
......@@ -835,9 +835,11 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do
create(:lfs_objects_project, project: project_broken_storage, lfs_object: lfs_object_5)
end
subject(:lfs_objects) { node.lfs_objects(primary_key_in: 1..LfsObject.last.id) }
context 'without selective sync' do
it 'returns all projects without selective sync' do
expect(node.lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3, lfs_object_4, lfs_object_5])
expect(lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3, lfs_object_4, lfs_object_5])
end
end
......@@ -847,14 +849,14 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do
end
it 'excludes LFS objects that are not in selectively synced projects' do
expect(node.lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3])
expect(lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3])
end
it 'excludes LFS objects from fork networks' do
forked_project = create(:project, group: synced_group)
create(:lfs_objects_project, project: forked_project, lfs_object: lfs_object_1)
expect(node.lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3])
expect(lfs_objects).to match_array([lfs_object_1, lfs_object_2, lfs_object_3])
end
end
......@@ -864,14 +866,14 @@ RSpec.describe GeoNode, :request_store, :geo, type: :model do
end
it 'excludes LFS objects that are not in selectively synced shards' do
expect(node.lfs_objects).to match_array([lfs_object_5])
expect(lfs_objects).to match_array([lfs_object_5])
end
it 'excludes LFS objects from fork networks' do
forked_project = create(:project, :broken_storage)
create(:lfs_objects_project, project: forked_project, lfs_object: lfs_object_5)
expect(node.lfs_objects).to match_array([lfs_object_5])
expect(lfs_objects).to match_array([lfs_object_5])
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment