Commit 375f3438 authored by Robert Speicher's avatar Robert Speicher

Merge branch '2274-improve-backfill' into 'master'

Geo: Resync repositories that have been updated recently

See merge request !1826
parents ff27f4a6 20482608
...@@ -10,18 +10,6 @@ module Geo ...@@ -10,18 +10,6 @@ module Geo
end end
def execute def execute
# When Geo customers upgrade to 9.0, the secondaries nodes that are
# enabled will start the backfilling process automatically. We need
# to populate the tracking database correctly for projects synced
# before the process being started or projects created during the
# backfilling. Otherwise, the query to retrieve the projects will
# always return the same projects because they don't have entries
# in the tracking database
if backfilled?
update_registry(DateTime.now, DateTime.now)
return
end
try_obtain_lease do try_obtain_lease do
log('Started repository sync') log('Started repository sync')
started_at, finished_at = fetch_repositories started_at, finished_at = fetch_repositories
...@@ -76,7 +64,6 @@ module Geo ...@@ -76,7 +64,6 @@ module Geo
def try_obtain_lease def try_obtain_lease
log('Trying to obtain lease to sync repository') log('Trying to obtain lease to sync repository')
repository_lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT).try_obtain repository_lease = Gitlab::ExclusiveLease.new(lease_key, timeout: LEASE_TIMEOUT).try_obtain
if repository_lease.nil? if repository_lease.nil?
...@@ -93,25 +80,7 @@ module Geo ...@@ -93,25 +80,7 @@ module Geo
Gitlab::ExclusiveLease.cancel(lease_key, repository_lease) Gitlab::ExclusiveLease.cancel(lease_key, repository_lease)
end end
def backfilled?
return false unless project.repository.exists?
return false if project.repository.exists? && project.repository.empty?
return false if failed_registry_exists?
true
end
def failed_registry_exists?
Geo::ProjectRegistry.failed.where(project_id: project_id).any?
end
def synced_registry_exists?
Geo::ProjectRegistry.synced.where(project_id: project_id).any?
end
def update_registry(started_at, finished_at) def update_registry(started_at, finished_at)
return if synced_registry_exists?
log('Updating registry information') log('Updating registry information')
registry = Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id) registry = Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id)
registry.last_repository_synced_at = started_at registry.last_repository_synced_at = started_at
......
...@@ -2,15 +2,18 @@ class GeoBackfillWorker ...@@ -2,15 +2,18 @@ class GeoBackfillWorker
include Sidekiq::Worker include Sidekiq::Worker
include CronjobQueue include CronjobQueue
RUN_TIME = 5.minutes.to_i.freeze RUN_TIME = 5.minutes.to_i
BATCH_SIZE = 100.freeze BATCH_SIZE = 100
LAST_SYNC_INTERVAL = 24.hours
def perform def perform
return unless Gitlab::Geo.configured? return unless Gitlab::Geo.configured?
return unless Gitlab::Geo.primary_node.present? return unless Gitlab::Geo.primary_node.present?
start_time = Time.now start_time = Time.now
project_ids = find_project_ids project_ids_not_synced = find_project_ids_not_synced
project_ids_updated_recently = find_synced_project_ids_updated_recently
project_ids = interleave(project_ids_not_synced, project_ids_updated_recently)
logger.info "Started Geo backfilling for #{project_ids.length} project(s)" logger.info "Started Geo backfilling for #{project_ids.length} project(s)"
...@@ -38,12 +41,34 @@ class GeoBackfillWorker ...@@ -38,12 +41,34 @@ class GeoBackfillWorker
private private
def find_project_ids def find_project_ids_not_synced
Project.where.not(id: Geo::ProjectRegistry.synced.pluck(:project_id)) Project.where.not(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.limit(BATCH_SIZE) .limit(BATCH_SIZE)
.pluck(:id) .pluck(:id)
end end
def find_synced_project_ids_updated_recently
Geo::ProjectRegistry.where(project_id: find_project_ids_updated_recently)
.where('last_repository_synced_at <= ?', LAST_SYNC_INTERVAL.ago)
.order(last_repository_synced_at: :asc)
.limit(BATCH_SIZE)
.pluck(:project_id)
end
def find_project_ids_updated_recently
Project.where(id: Geo::ProjectRegistry.synced.pluck(:project_id))
.where('last_repository_updated_at >= ?', LAST_SYNC_INTERVAL.ago)
.pluck(:id)
end
def interleave(first, second)
if first.length >= second.length
first.zip(second)
else
second.zip(first).map(&:reverse)
end.flatten(1).compact.take(BATCH_SIZE)
end
def over_time?(start_time) def over_time?(start_time)
Time.now - start_time >= RUN_TIME Time.now - start_time >= RUN_TIME
end end
......
---
title: 'Geo: Resync repositories that have been updated recently'
merge_request:
author:
...@@ -67,35 +67,67 @@ describe Geo::RepositoryBackfillService, services: true do ...@@ -67,35 +67,67 @@ describe Geo::RepositoryBackfillService, services: true do
context 'when repository exists and is not empty' do context 'when repository exists and is not empty' do
let(:project) { create(:project) } let(:project) { create(:project) }
it 'does not fetch the project repositories' do it 'fetches project repositories' do
expect_any_instance_of(Repository).not_to receive(:fetch_geo_mirror) fetch_count = 0
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do
fetch_count += 1
end
subject.execute subject.execute
expect(fetch_count).to eq 2
end end
context 'tracking database' do context 'tracking database' do
it 'tracks missing repository sync' do it 'tracks repository sync' do
expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1) expect { subject.execute }.to change(Geo::ProjectRegistry, :count).by(1)
end end
it 'stores last_repository_successful_sync_at when succeed' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true }
subject.execute
registry = Geo::ProjectRegistry.find_by(project_id: project.id)
expect(registry.last_repository_successful_sync_at).not_to be_nil
end
it 'reset last_repository_successful_sync_at when fail' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { raise Gitlab::Shell::Error }
subject.execute
registry = Geo::ProjectRegistry.find_by(project_id: project.id)
expect(registry.last_repository_successful_sync_at).to be_nil
end
end end
end end
context 'when repository was backfilled successfully' do context 'when repository was backfilled successfully' do
let(:project) { create(:project) } let(:project) { create(:project) }
let(:last_repository_successful_sync_at) { 5.days.ago } let(:last_repository_synced_at) { 5.days.ago }
let!(:registry) do let!(:registry) do
Geo::ProjectRegistry.create( Geo::ProjectRegistry.create(
project: project, project: project,
last_repository_synced_at: 5.days.ago, last_repository_synced_at: last_repository_synced_at,
last_repository_successful_sync_at: last_repository_successful_sync_at last_repository_successful_sync_at: last_repository_synced_at
) )
end end
it 'does not fetch the project repositories' do it 'fetches project repositories' do
expect_any_instance_of(Repository).not_to receive(:fetch_geo_mirror) fetch_count = 0
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) do
fetch_count += 1
end
subject.execute subject.execute
expect(fetch_count).to eq 2
end end
context 'tracking database' do context 'tracking database' do
...@@ -103,10 +135,26 @@ describe Geo::RepositoryBackfillService, services: true do ...@@ -103,10 +135,26 @@ describe Geo::RepositoryBackfillService, services: true do
expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count) expect { subject.execute }.not_to change(Geo::ProjectRegistry, :count)
end end
it 'does not update last_repository_successful_sync_at' do it 'updates registry when succeed' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { true }
subject.execute subject.execute
expect(registry.reload.last_repository_successful_sync_at).to be_within(1.second).of(last_repository_successful_sync_at) registry.reload
expect(registry.last_repository_synced_at).to be_within(1.minute).of(Time.now)
expect(registry.last_repository_successful_sync_at).to be_within(1.minute).of(Time.now)
end
it 'does not update registry last_repository_successful_sync_at when fail' do
allow_any_instance_of(Repository).to receive(:fetch_geo_mirror) { raise Gitlab::Shell::Error }
subject.execute
registry.reload
expect(registry.last_repository_synced_at).to be_within(1.minute).of(Time.now)
expect(registry.last_repository_successful_sync_at).to be_within(1.minute).of(last_repository_synced_at)
end end
end end
end end
......
...@@ -3,7 +3,8 @@ require 'spec_helper' ...@@ -3,7 +3,8 @@ require 'spec_helper'
describe Geo::GeoBackfillWorker, services: true do describe Geo::GeoBackfillWorker, services: true do
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') } let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
let!(:secondary) { create(:geo_node, :current) } let!(:secondary) { create(:geo_node, :current) }
let!(:projects) { create_list(:empty_project, 2) } let!(:project_1) { create(:empty_project) }
let!(:project_2) { create(:empty_project) }
subject { described_class.new } subject { described_class.new }
...@@ -20,7 +21,7 @@ describe Geo::GeoBackfillWorker, services: true do ...@@ -20,7 +21,7 @@ describe Geo::GeoBackfillWorker, services: true do
it 'performs Geo::RepositoryBackfillService for projects where last attempt to backfill failed' do it 'performs Geo::RepositoryBackfillService for projects where last attempt to backfill failed' do
Geo::ProjectRegistry.create( Geo::ProjectRegistry.create(
project: Project.first, project: project_1,
last_repository_synced_at: DateTime.now, last_repository_synced_at: DateTime.now,
last_repository_successful_sync_at: nil last_repository_successful_sync_at: nil
) )
...@@ -30,6 +31,27 @@ describe Geo::GeoBackfillWorker, services: true do ...@@ -30,6 +31,27 @@ describe Geo::GeoBackfillWorker, services: true do
subject.perform subject.perform
end end
it 'performs Geo::RepositoryBackfillService for backfilled projects updated recently' do
Geo::ProjectRegistry.create(
project: project_1,
last_repository_synced_at: 2.days.ago,
last_repository_successful_sync_at: 2.days.ago
)
Geo::ProjectRegistry.create(
project: project_2,
last_repository_synced_at: 2.days.ago,
last_repository_successful_sync_at: 2.days.ago
)
project_1.update_attribute(:last_repository_updated_at, 2.days.ago)
project_2.update_attribute(:last_repository_updated_at, 10.minutes.ago)
expect(Geo::RepositoryBackfillService).to receive(:new).once.and_return(spy)
subject.perform
end
it 'does not perform Geo::RepositoryBackfillService when tracking DB is not available' do it 'does not perform Geo::RepositoryBackfillService when tracking DB is not available' do
allow(Rails.configuration).to receive(:respond_to?).with(:geo_database) { false } allow(Rails.configuration).to receive(:respond_to?).with(:geo_database) { false }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment