Resync repositories that have been updated recently

parent 1f8f720b
......@@ -2,15 +2,18 @@ class GeoBackfillWorker
include Sidekiq::Worker
include CronjobQueue
RUN_TIME = 5.minutes.to_i.freeze
BATCH_SIZE = 100.freeze
RUN_TIME = 5.minutes.to_i
BATCH_SIZE = 100
LAST_SYNC_INTERVAL = 24.hours
def perform
return unless Gitlab::Geo.configured?
return unless Gitlab::Geo.primary_node.present?
start_time = Time.now
project_ids = find_project_ids
start_time = Time.now
project_ids_not_synced = find_project_ids_not_synced
project_ids_updated_recently = find_synced_project_ids_updated_recently
project_ids = interleave(project_ids_not_synced, project_ids_updated_recently)
logger.info "Started Geo backfilling for #{project_ids.length} project(s)"
......@@ -38,12 +41,34 @@ class GeoBackfillWorker
private
def find_project_ids
def find_project_ids_not_synced
Project.where.not(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.limit(BATCH_SIZE)
.pluck(:id)
end
def find_synced_project_ids_updated_recently
Geo::ProjectRegistry.where(project_id: find_project_ids_updated_recently)
.where('last_repository_synced_at <= ?', LAST_SYNC_INTERVAL.ago)
.order(last_repository_synced_at: :asc)
.limit(BATCH_SIZE)
.pluck(:project_id)
end
def find_project_ids_updated_recently
Project.where(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.where('last_repository_updated_at >= ?', LAST_SYNC_INTERVAL.ago)
.pluck(:id)
end
def interleave(first, second)
if first.length >= second.length
first.zip(second)
else
second.zip(first).map(&:reverse)
end.flatten(1).compact.take(BATCH_SIZE)
end
def over_time?(start_time)
Time.now - start_time >= RUN_TIME
end
......
......@@ -3,7 +3,8 @@ require 'spec_helper'
describe Geo::GeoBackfillWorker, services: true do
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
let!(:secondary) { create(:geo_node, :current) }
let!(:projects) { create_list(:empty_project, 2) }
let!(:project_1) { create(:empty_project) }
let!(:project_2) { create(:empty_project) }
subject { described_class.new }
......@@ -20,7 +21,7 @@ describe Geo::GeoBackfillWorker, services: true do
it 'performs Geo::RepositoryBackfillService for projects where last attempt to backfill failed' do
Geo::ProjectRegistry.create(
project: Project.first,
project: project_1,
last_repository_synced_at: DateTime.now,
last_repository_successful_sync_at: nil
)
......@@ -30,6 +31,27 @@ describe Geo::GeoBackfillWorker, services: true do
subject.perform
end
it 'performs Geo::RepositoryBackfillService for backfilled projects updated recently' do
Geo::ProjectRegistry.create(
project: project_1,
last_repository_synced_at: 2.days.ago,
last_repository_successful_sync_at: 2.days.ago
)
Geo::ProjectRegistry.create(
project: project_2,
last_repository_synced_at: 2.days.ago,
last_repository_successful_sync_at: 2.days.ago
)
project_1.update_attribute(:last_repository_updated_at, 2.days.ago)
project_2.update_attribute(:last_repository_updated_at, 10.minutes.ago)
expect(Geo::RepositoryBackfillService).to receive(:new).once.and_return(spy)
subject.perform
end
it 'does not perform Geo::RepositoryBackfillService when tracking DB is not available' do
allow(Rails.configuration).to receive(:respond_to?).with(:geo_database) { false }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment