Commit 40fa4c96 authored by Nick Thomas's avatar Nick Thomas

Merge branch 'sh-issue-3645' into 'master'

Limit the huge cross-database pluck for LFS objects and attachments

Closes #3645

See merge request gitlab-org/gitlab-ee!3075
parents c5affe24 2aadbc09
...@@ -18,8 +18,10 @@ module Geo ...@@ -18,8 +18,10 @@ module Geo
def find_object_ids def find_object_ids
downloaded_ids = find_downloaded_ids([:attachment, :avatar, :file]) downloaded_ids = find_downloaded_ids([:attachment, :avatar, :file])
current_node.uploads unsynched_downloads = filter_downloaded_ids(
.where.not(id: downloaded_ids) current_node.uploads, downloaded_ids, Upload.table_name)
unsynched_downloads
.order(created_at: :desc) .order(created_at: :desc)
.limit(db_retrieve_batch_size) .limit(db_retrieve_batch_size)
.pluck(:id, :uploader) .pluck(:id, :uploader)
...@@ -29,14 +31,33 @@ module Geo ...@@ -29,14 +31,33 @@ module Geo
def find_lfs_object_ids def find_lfs_object_ids
downloaded_ids = find_downloaded_ids([:lfs]) downloaded_ids = find_downloaded_ids([:lfs])
current_node.lfs_objects unsynched_downloads = filter_downloaded_ids(
.where.not(id: downloaded_ids) current_node.lfs_objects, downloaded_ids, LfsObject.table_name)
unsynched_downloads
.order(created_at: :desc) .order(created_at: :desc)
.limit(db_retrieve_batch_size) .limit(db_retrieve_batch_size)
.pluck(:id) .pluck(:id)
.map { |id| [id, :lfs] } .map { |id| [id, :lfs] }
end end
# This query requires data from two different databases, and unavoidably
# plucks a list of file IDs from one into the other. This will not scale
# well with the number of synchronized files--the query will increase
# linearly in size--so this should be replaced with postgres_fdw ASAP.
def filter_downloaded_ids(objects, downloaded_ids, table_name)
return objects if downloaded_ids.empty?
joined_relation = objects.joins(<<~SQL)
LEFT OUTER JOIN
(VALUES #{downloaded_ids.map { |id| "(#{id}, 't')" }.join(',')})
file_registry(file_id, registry_present)
ON #{table_name}.id = file_registry.file_id
SQL
joined_relation.where(file_registry: { registry_present: [nil, false] })
end
def find_downloaded_ids(file_types) def find_downloaded_ids(file_types)
downloaded_ids = Geo::FileRegistry.where(file_type: file_types).pluck(:file_id) downloaded_ids = Geo::FileRegistry.where(file_type: file_types).pluck(:file_id)
(downloaded_ids + scheduled_file_ids(file_types)).uniq (downloaded_ids + scheduled_file_ids(file_types)).uniq
......
---
title: 'Geo: Limit the huge cross-database pluck for LFS objects and attachments'
merge_request:
author:
type: fixed
require 'spec_helper' require 'spec_helper'
describe Geo::FileDownloadDispatchWorker do describe Geo::FileDownloadDispatchWorker, :postgresql do
include ::EE::GeoHelpers include ::EE::GeoHelpers
set(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') } set(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment