Commit fdd33901 authored by Nick Thomas's avatar Nick Thomas

Improve Geo repository sync performance for larger databases

parent dfe00667
...@@ -135,6 +135,27 @@ class GeoNode < ActiveRecord::Base ...@@ -135,6 +135,27 @@ class GeoNode < ActiveRecord::Base
end end
end end
# These are projects that meet the project restriction but haven't yet been
# synced (i.e., do not yet have a project registry entry).
#
# This query requires data from two different databases, and unavoidably
# plucks a list of project IDs from one into the other. This will not scale
# well with the number of synchronized projects - the query will increase
# linearly in size - so this should be replaced with postgres_fdw ASAP.
def unsynced_projects
registry_project_ids = project_registries.pluck(:project_id)
return projects if registry_project_ids.empty?
joined_relation = projects.joins(<<~SQL)
LEFT OUTER JOIN
(VALUES #{registry_project_ids.map { |id| "(#{id}, 't')" }.join(',')})
project_registry(project_id, registry_present)
ON projects.id = project_registry.project_id
SQL
joined_relation.where(project_registry: { registry_present: [nil, false] })
end
def uploads def uploads
if restricted_project_ids if restricted_project_ids
uploads_table = Upload.arel_table uploads_table = Upload.arel_table
......
...@@ -23,9 +23,8 @@ module Geo ...@@ -23,9 +23,8 @@ module Geo
end end
def find_project_ids_not_synced def find_project_ids_not_synced
current_node.projects current_node.unsynced_projects
.where.not(id: Geo::ProjectRegistry.pluck(:project_id)) .reorder(last_repository_updated_at: :desc)
.order(last_repository_updated_at: :desc)
.limit(db_retrieve_batch_size) .limit(db_retrieve_batch_size)
.pluck(:id) .pluck(:id)
end end
......
---
title: Improve Geo repository sync performance for larger databases
merge_request: 2887
author:
type: fixed
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment