Commit d702e3e9 authored by Robert Speicher's avatar Robert Speicher

Merge branch '2274-rename-backfill-services' into 'master'

Geo - Rename backfill service/worker

Closes #2274

See merge request !1883
parents a85912be dc7c04de
module Geo
class RepositoryBackfillService
class RepositorySyncService
attr_reader :project_id
LEASE_TIMEOUT = 8.hours.freeze
LEASE_KEY_PREFIX = 'repository_backfill_service'.freeze
LEASE_KEY_PREFIX = 'repository_sync_service'.freeze
def initialize(project_id)
@project_id = project_id
......@@ -81,7 +81,7 @@ module Geo
end
def update_registry(started_at, finished_at)
log('Updating registry information')
log('Updating repository sync information')
registry = Geo::ProjectRegistry.find_or_initialize_by(project_id: project_id)
registry.last_repository_synced_at = started_at
registry.last_repository_successful_sync_at = finished_at if finished_at
......
class GeoBackfillWorker
class GeoRepositorySyncWorker
include Sidekiq::Worker
include CronjobQueue
......@@ -15,20 +15,20 @@ class GeoBackfillWorker
project_ids_updated_recently = find_synced_project_ids_updated_recently
project_ids = interleave(project_ids_not_synced, project_ids_updated_recently)
logger.info "Started Geo backfilling for #{project_ids.length} project(s)"
logger.info "Started Geo repository syncing for #{project_ids.length} project(s)"
project_ids.each do |project_id|
begin
break if over_time?(start_time)
break unless Gitlab::Geo.current_node_enabled?
# We try to obtain a lease here for the entire backfilling process
# because backfill the repositories continuously at a controlled rate
# instead of hammering the primary node. Initially, we are backfilling
# We try to obtain a lease here for the entire sync process because we
# want to sync the repositories continuously at a controlled rate
# instead of hammering the primary node. Initially, we are syncing
# one repo at a time. If we don't obtain the lease here, every 5
# minutes all of 100 projects will be synced.
try_obtain_lease do |lease|
Geo::RepositoryBackfillService.new(project_id).execute
Geo::RepositorySyncService.new(project_id).execute
end
rescue ActiveRecord::RecordNotFound
logger.error("Couldn't find project with ID=#{project_id}, skipping syncing")
......@@ -36,7 +36,7 @@ class GeoBackfillWorker
end
end
logger.info "Finished Geo backfilling for #{project_ids.length} project(s)"
logger.info "Finished Geo repository syncing for #{project_ids.length} project(s)"
end
private
......@@ -86,10 +86,10 @@ class GeoBackfillWorker
end
def lease_key
Geo::RepositoryBackfillService::LEASE_KEY_PREFIX
Geo::RepositorySyncService::LEASE_KEY_PREFIX
end
def lease_timeout
Geo::RepositoryBackfillService::LEASE_TIMEOUT
Geo::RepositorySyncService::LEASE_TIMEOUT
end
end
......@@ -216,9 +216,9 @@ production: &base
geo_bulk_notify_worker:
cron: "*/10 * * * * *"
# GitLab Geo backfill worker
# GitLab Geo repository sync worker
# NOTE: This will only take effect if Geo is enabled
geo_backfill_worker:
geo_repository_sync_worker:
cron: "*/5 * * * *"
# GitLab Geo file download worker
......
......@@ -396,9 +396,9 @@ Settings.cron_jobs['ldap_group_sync_worker']['job_class'] = 'LdapGroupSyncWorker
Settings.cron_jobs['geo_bulk_notify_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['geo_bulk_notify_worker']['cron'] ||= '*/10 * * * * *'
Settings.cron_jobs['geo_bulk_notify_worker']['job_class'] ||= 'GeoBulkNotifyWorker'
Settings.cron_jobs['geo_backfill_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['geo_backfill_worker']['cron'] ||= '*/5 * * * *'
Settings.cron_jobs['geo_backfill_worker']['job_class'] ||= 'GeoBackfillWorker'
Settings.cron_jobs['geo_repository_sync_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['geo_repository_sync_worker']['cron'] ||= '*/5 * * * *'
Settings.cron_jobs['geo_repository_sync_worker']['job_class'] ||= 'GeoRepositorySyncWorker'
Settings.cron_jobs['geo_download_dispatch_worker'] ||= Settingslogic.new({})
Settings.cron_jobs['geo_download_dispatch_worker']['cron'] ||= '5 * * * *'
Settings.cron_jobs['geo_download_dispatch_worker']['job_class'] ||= 'GeoFileDownloadDispatchWorker'
......
......@@ -13,7 +13,7 @@ module Gitlab
).freeze
PRIMARY_JOBS = %i(bulk_notify_job).freeze
SECONDARY_JOBS = %i(backfill_job file_download_job).freeze
SECONDARY_JOBS = %i(repository_sync_job file_download_job).freeze
def self.current_node
self.cache_value(:geo_node_current) do
......@@ -37,7 +37,7 @@ module Gitlab
def self.current_node_enabled?
# No caching of the enabled! If we cache it and an admin disables
# this node, an active GeoBackfillWorker would keep going for up
# this node, an active GeoRepositorySyncWorker would keep going for up
# to max run time after the node was disabled.
Gitlab::Geo.current_node.reload.enabled?
end
......@@ -74,8 +74,8 @@ module Gitlab
Sidekiq::Cron::Job.find('geo_bulk_notify_worker')
end
def self.backfill_job
Sidekiq::Cron::Job.find('geo_backfill_worker')
def self.repository_sync_job
Sidekiq::Cron::Job.find('geo_repository_sync_worker')
end
def self.file_download_job
......
......@@ -122,7 +122,7 @@ describe Gitlab::Geo, lib: true do
end
before(:all) do
jobs = %w(geo_bulk_notify_worker geo_backfill_worker)
jobs = %w(geo_bulk_notify_worker geo_repository_sync_worker)
jobs.each { |job| init_cron_job(job, job.camelize) }
# TODO: Make this name consistent
......@@ -134,7 +134,7 @@ describe Gitlab::Geo, lib: true do
described_class.configure_cron_jobs!
expect(described_class.bulk_notify_job).to be_enabled
expect(described_class.backfill_job).not_to be_enabled
expect(described_class.repository_sync_job).not_to be_enabled
expect(described_class.file_download_job).not_to be_enabled
end
......@@ -143,7 +143,7 @@ describe Gitlab::Geo, lib: true do
described_class.configure_cron_jobs!
expect(described_class.bulk_notify_job).not_to be_enabled
expect(described_class.backfill_job).to be_enabled
expect(described_class.repository_sync_job).to be_enabled
expect(described_class.file_download_job).to be_enabled
end
......@@ -151,7 +151,7 @@ describe Gitlab::Geo, lib: true do
described_class.configure_cron_jobs!
expect(described_class.bulk_notify_job).not_to be_enabled
expect(described_class.backfill_job).not_to be_enabled
expect(described_class.repository_sync_job).not_to be_enabled
expect(described_class.file_download_job).not_to be_enabled
end
end
......
require 'spec_helper'
describe Geo::RepositoryBackfillService, services: true do
describe Geo::RepositorySyncService, services: true do
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
subject { described_class.new(project.id) }
......@@ -106,7 +106,7 @@ describe Geo::RepositoryBackfillService, services: true do
end
end
context 'when repository was backfilled successfully' do
context 'when repository was synced successfully' do
let(:project) { create(:project) }
let(:last_repository_synced_at) { 5.days.ago }
......@@ -159,7 +159,7 @@ describe Geo::RepositoryBackfillService, services: true do
end
end
context 'when last attempt to backfill the repository failed' do
context 'when last attempt to sync the repository failed' do
let(:project) { create(:project) }
let!(:registry) do
......
require 'spec_helper'
describe Geo::GeoBackfillWorker, services: true do
describe Geo::GeoRepositorySyncWorker, services: true do
let!(:primary) { create(:geo_node, :primary, host: 'primary-geo-node') }
let!(:secondary) { create(:geo_node, :current) }
let!(:project_1) { create(:empty_project) }
......@@ -13,25 +13,25 @@ describe Geo::GeoBackfillWorker, services: true do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { true }
end
it 'performs Geo::RepositoryBackfillService for each project' do
expect(Geo::RepositoryBackfillService).to receive(:new).twice.and_return(spy)
it 'performs Geo::RepositorySyncService for each project' do
expect(Geo::RepositorySyncService).to receive(:new).twice.and_return(spy)
subject.perform
end
it 'performs Geo::RepositoryBackfillService for projects where last attempt to backfill failed' do
it 'performs Geo::RepositorySyncService for projects where last attempt to sync failed' do
Geo::ProjectRegistry.create(
project: project_1,
last_repository_synced_at: DateTime.now,
last_repository_successful_sync_at: nil
)
expect(Geo::RepositoryBackfillService).to receive(:new).twice.and_return(spy)
expect(Geo::RepositorySyncService).to receive(:new).twice.and_return(spy)
subject.perform
end
it 'performs Geo::RepositoryBackfillService for backfilled projects updated recently' do
it 'performs Geo::RepositorySyncService for synced projects updated recently' do
Geo::ProjectRegistry.create(
project: project_1,
last_repository_synced_at: 2.days.ago,
......@@ -47,39 +47,39 @@ describe Geo::GeoBackfillWorker, services: true do
project_1.update_attribute(:last_repository_updated_at, 2.days.ago)
project_2.update_attribute(:last_repository_updated_at, 10.minutes.ago)
expect(Geo::RepositoryBackfillService).to receive(:new).once.and_return(spy)
expect(Geo::RepositorySyncService).to receive(:new).once.and_return(spy)
subject.perform
end
it 'does not perform Geo::RepositoryBackfillService when tracking DB is not available' do
it 'does not perform Geo::RepositorySyncService when tracking DB is not available' do
allow(Rails.configuration).to receive(:respond_to?).with(:geo_database) { false }
expect(Geo::RepositoryBackfillService).not_to receive(:new)
expect(Geo::RepositorySyncService).not_to receive(:new)
subject.perform
end
it 'does not perform Geo::RepositoryBackfillService when primary node does not exists' do
it 'does not perform Geo::RepositorySyncService when primary node does not exists' do
allow(Gitlab::Geo).to receive(:primary_node) { nil }
expect(Geo::RepositoryBackfillService).not_to receive(:new)
expect(Geo::RepositorySyncService).not_to receive(:new)
subject.perform
end
it 'does not perform Geo::RepositoryBackfillService when node is disabled' do
it 'does not perform Geo::RepositorySyncService when node is disabled' do
allow_any_instance_of(GeoNode).to receive(:enabled?) { false }
expect(Geo::RepositoryBackfillService).not_to receive(:new)
expect(Geo::RepositorySyncService).not_to receive(:new)
subject.perform
end
it 'does not perform Geo::RepositoryBackfillService when can not obtain a lease' do
it 'does not perform Geo::RepositorySyncService when can not obtain a lease' do
allow_any_instance_of(Gitlab::ExclusiveLease).to receive(:try_obtain) { false }
expect(Geo::RepositoryBackfillService).not_to receive(:new)
expect(Geo::RepositorySyncService).not_to receive(:new)
subject.perform
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment