Move GitGarbageCollectWorker to Projects namespace

In this commit we move the GitGarbageCollectWorker to the Projects
namespace because we want to reuse the logic later in similar workers.
parent 36f7da92
......@@ -1435,7 +1435,6 @@ RSpec/AnyInstanceOf:
- 'spec/workers/emails_on_push_worker_spec.rb'
- 'spec/workers/error_tracking_issue_link_worker_spec.rb'
- 'spec/workers/expire_pipeline_cache_worker_spec.rb'
- 'spec/workers/git_garbage_collect_worker_spec.rb'
- 'spec/workers/group_export_worker_spec.rb'
- 'spec/workers/group_import_worker_spec.rb'
- 'spec/workers/namespaceless_project_destroy_worker_spec.rb'
......
......@@ -40,7 +40,7 @@ module Projects
apply_bfg_object_map!
# Remove older objects that are no longer referenced
GitGarbageCollectWorker.new.perform(project.id, :prune, "project_cleanup:gc:#{project.id}")
Projects::GitGarbageCollectWorker.new.perform(project.id, :prune, "project_cleanup:gc:#{project.id}")
# The cache may now be inaccurate, and holding onto it could prevent
# bugs assuming the presence of some object from manifesting for some
......
......@@ -45,7 +45,7 @@ module Repositories
private
def execute_gitlab_shell_gc(lease_uuid)
GitGarbageCollectWorker.perform_async(@resource.id, task, lease_key, lease_uuid)
Projects::GitGarbageCollectWorker.perform_async(@resource.id, task, lease_key, lease_uuid)
ensure
if pushes_since_gc >= gc_period
Gitlab::Metrics.measure(:reset_pushes_since_gc) do
......
......@@ -1999,6 +1999,14 @@
:weight: 1
:idempotent: true
:tags: []
- :name: projects_git_garbage_collect
:feature_category: :gitaly
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: prometheus_create_default_alerts
:feature_category: :incident_management
:has_external_dependencies:
......
# frozen_string_literal: true
# According to our docs, we can only remove workers on major releases
# https://docs.gitlab.com/ee/development/sidekiq_style_guide.html#removing-workers.
#
# We need to still maintain this until 14.0 but with the current functionality.
#
# In https://gitlab.com/gitlab-org/gitlab/-/issues/299290 we track that removal.
class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
......@@ -7,117 +13,7 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :gitaly
loggable_arguments 1, 2, 3
# Timeout set to 24h
LEASE_TIMEOUT = 86400
def perform(project_id, task = :gc, lease_key = nil, lease_uuid = nil)
lease_key ||= "git_gc:#{task}:#{project_id}"
project = Project.find(project_id)
active_uuid = get_lease_uuid(lease_key)
if active_uuid
return unless active_uuid == lease_uuid
renew_lease(lease_key, active_uuid)
else
lease_uuid = try_obtain_lease(lease_key)
return unless lease_uuid
end
task = task.to_sym
if gc?(task)
::Projects::GitDeduplicationService.new(project).execute
cleanup_orphan_lfs_file_references(project)
end
gitaly_call(task, project)
# Refresh the branch cache in case garbage collection caused a ref lookup to fail
flush_ref_caches(project) if gc?(task)
update_repository_statistics(project) if task != :pack_refs
# In case pack files are deleted, release libgit2 cache and open file
# descriptors ASAP instead of waiting for Ruby garbage collection
project.cleanup
ensure
cancel_lease(lease_key, lease_uuid) if lease_key.present? && lease_uuid.present?
end
private
def gc?(task)
task == :gc || task == :prune
end
def try_obtain_lease(key)
::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain
end
def renew_lease(key, uuid)
::Gitlab::ExclusiveLease.new(key, uuid: uuid, timeout: LEASE_TIMEOUT).renew
end
def cancel_lease(key, uuid)
::Gitlab::ExclusiveLease.cancel(key, uuid)
end
def get_lease_uuid(key)
::Gitlab::ExclusiveLease.get_uuid(key)
end
def gitaly_call(task, project)
repository = project.repository.raw_repository
client = if task == :pack_refs
Gitlab::GitalyClient::RefService.new(repository)
else
Gitlab::GitalyClient::RepositoryService.new(repository)
end
case task
when :prune, :gc
client.garbage_collect(bitmaps_enabled?, prune: task == :prune)
when :full_repack
client.repack_full(bitmaps_enabled?)
when :incremental_repack
client.repack_incremental
when :pack_refs
client.pack_refs
end
rescue GRPC::NotFound => e
Gitlab::GitLogger.error("#{__method__} failed:\nRepository not found")
raise Gitlab::Git::Repository::NoRepository.new(e)
rescue GRPC::BadStatus => e
Gitlab::GitLogger.error("#{__method__} failed:\n#{e}")
raise Gitlab::Git::CommandError.new(e)
end
def cleanup_orphan_lfs_file_references(project)
return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary
::Gitlab::Cleanup::OrphanLfsFileReferences.new(project, dry_run: false, logger: logger).run!
rescue => err
Gitlab::GitLogger.warn(message: "Cleaning up orphan LFS objects files failed", error: err.message)
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(err)
end
def flush_ref_caches(project)
project.repository.expire_branches_cache
project.repository.branch_names
project.repository.has_visible_content?
end
def update_repository_statistics(project)
project.repository.expire_statistics_caches
return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary
Projects::UpdateStatisticsService.new(project, nil, statistics: [:repository_size, :lfs_objects_size]).execute
end
def bitmaps_enabled?
Gitlab::CurrentSettings.housekeeping_bitmaps_enabled
::Projects::GitGarbageCollectWorker.new.perform(project_id, task, lease_key, lease_uuid)
end
end
# frozen_string_literal: true
module Projects
class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
sidekiq_options retry: false
feature_category :gitaly
loggable_arguments 1, 2, 3
# Timeout set to 24h
LEASE_TIMEOUT = 86400
def perform(project_id, task = :gc, lease_key = nil, lease_uuid = nil)
lease_key ||= "git_gc:#{task}:#{project_id}"
project = find_project(project_id)
active_uuid = get_lease_uuid(lease_key)
if active_uuid
return unless active_uuid == lease_uuid
renew_lease(lease_key, active_uuid)
else
lease_uuid = try_obtain_lease(lease_key)
return unless lease_uuid
end
task = task.to_sym
if gc?(task)
::Projects::GitDeduplicationService.new(project).execute
cleanup_orphan_lfs_file_references(project)
end
gitaly_call(task, project)
# Refresh the branch cache in case garbage collection caused a ref lookup to fail
flush_ref_caches(project) if gc?(task)
update_repository_statistics(project) if task != :pack_refs
# In case pack files are deleted, release libgit2 cache and open file
# descriptors ASAP instead of waiting for Ruby garbage collection
project.cleanup
ensure
cancel_lease(lease_key, lease_uuid) if lease_key.present? && lease_uuid.present?
end
private
def find_project(project_id)
Project.find(project_id)
end
def gc?(task)
task == :gc || task == :prune
end
def try_obtain_lease(key)
::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain
end
def renew_lease(key, uuid)
::Gitlab::ExclusiveLease.new(key, uuid: uuid, timeout: LEASE_TIMEOUT).renew
end
def cancel_lease(key, uuid)
::Gitlab::ExclusiveLease.cancel(key, uuid)
end
def get_lease_uuid(key)
::Gitlab::ExclusiveLease.get_uuid(key)
end
def gitaly_call(task, project)
repository = project.repository.raw_repository
client = get_gitaly_client(task, repository)
case task
when :prune, :gc
client.garbage_collect(bitmaps_enabled?, prune: task == :prune)
when :full_repack
client.repack_full(bitmaps_enabled?)
when :incremental_repack
client.repack_incremental
when :pack_refs
client.pack_refs
end
rescue GRPC::NotFound => e
Gitlab::GitLogger.error("#{__method__} failed:\nRepository not found")
raise Gitlab::Git::Repository::NoRepository.new(e)
rescue GRPC::BadStatus => e
Gitlab::GitLogger.error("#{__method__} failed:\n#{e}")
raise Gitlab::Git::CommandError.new(e)
end
def get_gitaly_client(task, repository)
if task == :pack_refs
Gitlab::GitalyClient::RefService
else
Gitlab::GitalyClient::RepositoryService
end.new(repository)
end
def cleanup_orphan_lfs_file_references(project)
return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary
::Gitlab::Cleanup::OrphanLfsFileReferences.new(project, dry_run: false, logger: logger).run!
rescue => err
Gitlab::GitLogger.warn(message: "Cleaning up orphan LFS objects files failed", error: err.message)
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(err)
end
def flush_ref_caches(project)
project.repository.expire_branches_cache
project.repository.branch_names
project.repository.has_visible_content?
end
def update_repository_statistics(project)
project.repository.expire_statistics_caches
return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary
Projects::UpdateStatisticsService.new(project, nil, statistics: [:repository_size, :lfs_objects_size]).execute
end
def bitmaps_enabled?
Gitlab::CurrentSettings.housekeeping_bitmaps_enabled
end
end
end
......@@ -270,6 +270,8 @@
- 1
- - project_update_repository_storage
- 1
- - projects_git_garbage_collect
- 1
- - prometheus_create_default_alerts
- 1
- - propagate_integration
......
......@@ -49,7 +49,7 @@ It is ultimately performed by the Gitaly RPC `FetchIntoObjectPool`.
This is the current call stack by which it is invoked:
1. `Repositories::HousekeepingService#execute_gitlab_shell_gc`
1. `GitGarbageCollectWorker#perform`
1. `Projects::GitGarbageCollectWorker#perform`
1. `Projects::GitDeduplicationService#fetch_from_source`
1. `ObjectPool#fetch`
1. `ObjectPoolService#fetch`
......
......@@ -52,7 +52,7 @@ module Geo
end
def execute_gitlab_shell_gc(lease_uuid)
GitGarbageCollectWorker.perform_async(project.id, task, lease_key, lease_uuid)
::Projects::GitGarbageCollectWorker.perform_async(project.id, task, lease_key, lease_uuid)
ensure
if should_reset?
Gitlab::Metrics.measure(:geo_reset_syncs_since_gc) do
......
......@@ -52,13 +52,13 @@ RSpec.describe Geo::ProjectHousekeepingService do
stub_exclusive_lease(:the_lease_key, :the_uuid)
# At fetch 200
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid)
expect(::Projects::GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid)
.once
# At fetch 50, 100, 150
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :full_repack, :the_lease_key, :the_uuid)
expect(::Projects::GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :full_repack, :the_lease_key, :the_uuid)
.exactly(3).times
# At fetch 10, 20, ... (except those above)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid)
expect(::Projects::GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid)
.exactly(16).times
201.times do
......@@ -76,7 +76,7 @@ RSpec.describe Geo::ProjectHousekeepingService do
allow(service).to receive(:lease_key).and_return(:the_lease_key)
stub_exclusive_lease(:the_lease_key, :the_uuid)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid).once
expect(::Projects::GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :gc, :the_lease_key, :the_uuid).once
service.execute
end
......@@ -88,7 +88,7 @@ RSpec.describe Geo::ProjectHousekeepingService do
it 'does not run gc for a non-new repository' do
stub_exclusive_lease(:the_lease_key, :the_uuid)
expect(GitGarbageCollectWorker).not_to receive(:perform_async)
expect(::Projects::GitGarbageCollectWorker).not_to receive(:perform_async)
service.execute
end
......@@ -102,7 +102,7 @@ RSpec.describe Geo::ProjectHousekeepingService do
end
it 'does not enqueue a job' do
expect(GitGarbageCollectWorker).not_to receive(:perform_async)
expect(::Projects::GitGarbageCollectWorker).not_to receive(:perform_async)
expect(service.send(:do_housekeeping)).to be_falsey
end
......@@ -119,10 +119,10 @@ RSpec.describe Geo::ProjectHousekeepingService do
expect(service).to receive(:try_obtain_lease).and_return(:the_uuid)
expect(service).to receive(:lease_key).and_return(:the_lease_key)
expect(service).to receive(:task).and_return(:incremental_repack)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original
expect(::Projects::GitGarbageCollectWorker).to receive(:perform_async).with(project.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original
Sidekiq::Testing.fake! do
expect { service.send(:do_housekeeping) }.to change(GitGarbageCollectWorker.jobs, :size).by(1)
expect { service.send(:do_housekeeping) }.to change(::Projects::GitGarbageCollectWorker.jobs, :size).by(1)
end
end
......
......@@ -88,7 +88,7 @@ RSpec.describe Projects::CleanupService do
end
it 'runs garbage collection on the repository' do
expect_next_instance_of(GitGarbageCollectWorker) do |worker|
expect_next_instance_of(Projects::GitGarbageCollectWorker) do |worker|
expect(worker).to receive(:perform).with(project.id, :prune, "project_cleanup:gc:#{project.id}")
end
......
......@@ -71,7 +71,7 @@ RSpec.shared_examples 'moves repository to another storage' do |repository_type|
it 'does not enqueue a GC run' do
expect { subject.execute }
.not_to change(GitGarbageCollectWorker.jobs, :count)
.not_to change(Projects::GitGarbageCollectWorker.jobs, :count)
end
end
......@@ -84,12 +84,12 @@ RSpec.shared_examples 'moves repository to another storage' do |repository_type|
stub_application_setting(housekeeping_enabled: false)
expect { subject.execute }
.not_to change(GitGarbageCollectWorker.jobs, :count)
.not_to change(Projects::GitGarbageCollectWorker.jobs, :count)
end
it 'enqueues a GC run' do
expect { subject.execute }
.to change(GitGarbageCollectWorker.jobs, :count).by(1)
.to change(Projects::GitGarbageCollectWorker.jobs, :count).by(1)
end
end
end
......
......@@ -9,10 +9,10 @@ RSpec.shared_examples 'housekeeps repository' do
expect(subject).to receive(:try_obtain_lease).and_return(:the_uuid)
expect(subject).to receive(:lease_key).and_return(:the_lease_key)
expect(subject).to receive(:task).and_return(:incremental_repack)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original
Sidekiq::Testing.fake! do
expect { subject.execute }.to change(GitGarbageCollectWorker.jobs, :size).by(1)
expect { subject.execute }.to change(Projects::GitGarbageCollectWorker.jobs, :size).by(1)
end
end
......@@ -38,7 +38,7 @@ RSpec.shared_examples 'housekeeps repository' do
end
it 'does not enqueue a job' do
expect(GitGarbageCollectWorker).not_to receive(:perform_async)
expect(Projects::GitGarbageCollectWorker).not_to receive(:perform_async)
expect { subject.execute }.to raise_error(Repositories::HousekeepingService::LeaseTaken)
end
......@@ -63,16 +63,16 @@ RSpec.shared_examples 'housekeeps repository' do
allow(subject).to receive(:lease_key).and_return(:the_lease_key)
# At push 200
expect(GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :gc, :the_lease_key, :the_uuid)
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :gc, :the_lease_key, :the_uuid)
.once
# At push 50, 100, 150
expect(GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :full_repack, :the_lease_key, :the_uuid)
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :full_repack, :the_lease_key, :the_uuid)
.exactly(3).times
# At push 10, 20, ... (except those above)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid)
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid)
.exactly(16).times
# At push 6, 12, 18, ... (except those above)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :pack_refs, :the_lease_key, :the_uuid)
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :pack_refs, :the_lease_key, :the_uuid)
.exactly(27).times
201.times do
......@@ -90,7 +90,7 @@ RSpec.shared_examples 'housekeeps repository' do
allow(housekeeping).to receive(:try_obtain_lease).and_return(:gc_uuid)
allow(housekeeping).to receive(:lease_key).and_return(:gc_lease_key)
expect(GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :gc, :gc_lease_key, :gc_uuid).twice
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :gc, :gc_lease_key, :gc_uuid).twice
2.times do
housekeeping.execute
......
......@@ -5,350 +5,22 @@ require 'fileutils'
require 'spec_helper'
RSpec.describe GitGarbageCollectWorker do
include GitHelpers
let_it_be(:project) { create(:project, :repository) }
let(:shell) { Gitlab::Shell.new }
let!(:lease_uuid) { SecureRandom.uuid }
let!(:lease_key) { "project_housekeeping:#{project.id}" }
let(:lease_uuid) { SecureRandom.uuid }
let(:lease_key) { "project_housekeeping:#{project.id}" }
let(:task) { :full_repack }
let(:params) { [project.id, task, lease_key, lease_uuid] }
subject { described_class.new }
shared_examples 'it calls Gitaly' do
specify do
expect_any_instance_of(Gitlab::GitalyClient::RepositoryService).to receive(gitaly_task)
.and_return(nil)
subject.perform(*params)
end
end
shared_examples 'it updates the project statistics' do
it 'updates the project statistics' do
expect_next_instance_of(Projects::UpdateStatisticsService, project, nil, statistics: [:repository_size, :lfs_objects_size]) do |service|
expect(service).to receive(:execute).and_call_original
end
subject.perform(*params)
end
it 'does nothing if the database is read-only' do
allow(Gitlab::Database).to receive(:read_only?) { true }
expect_any_instance_of(Projects::UpdateStatisticsService).not_to receive(:execute)
subject.perform(*params)
end
end
describe "#perform" do
let(:gitaly_task) { :garbage_collect }
let(:task) { :gc }
context 'with active lease_uuid' do
before do
allow(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
end
it_behaves_like 'it calls Gitaly'
it_behaves_like 'it updates the project statistics'
it "flushes ref caches when the task if 'gc'" do
expect(subject).to receive(:renew_lease).with(lease_key, lease_uuid).and_call_original
expect_any_instance_of(Repository).to receive(:expire_branches_cache).and_call_original
expect_any_instance_of(Repository).to receive(:branch_names).and_call_original
expect_any_instance_of(Repository).to receive(:has_visible_content?).and_call_original
expect_any_instance_of(Gitlab::Git::Repository).to receive(:has_visible_content?).and_call_original
subject.perform(*params)
end
it 'handles gRPC errors' do
expect_any_instance_of(Gitlab::GitalyClient::RepositoryService).to receive(:garbage_collect).and_raise(GRPC::NotFound)
expect { subject.perform(*params) }.to raise_exception(Gitlab::Git::Repository::NoRepository)
end
end
context 'with different lease than the active one' do
before do
allow(subject).to receive(:get_lease_uuid).and_return(SecureRandom.uuid)
end
it 'returns silently' do
expect_any_instance_of(Repository).not_to receive(:expire_branches_cache).and_call_original
expect_any_instance_of(Repository).not_to receive(:branch_names).and_call_original
expect_any_instance_of(Repository).not_to receive(:has_visible_content?).and_call_original
subject.perform(*params)
end
end
context 'with no active lease' do
let(:params) { [project.id] }
before do
allow(subject).to receive(:get_lease_uuid).and_return(false)
it 'calls the Projects::GitGarbageGitGarbageCollectWorker with the same params' do
expect_next_instance_of(Projects::GitGarbageCollectWorker) do |instance|
expect(instance).to receive(:perform).with(*params)
end
context 'when is able to get the lease' do
before do
allow(subject).to receive(:try_obtain_lease).and_return(SecureRandom.uuid)
end
it_behaves_like 'it calls Gitaly'
it_behaves_like 'it updates the project statistics'
it "flushes ref caches when the task if 'gc'" do
expect(subject).to receive(:get_lease_uuid).with("git_gc:#{task}:#{project.id}").and_return(false)
expect_any_instance_of(Repository).to receive(:expire_branches_cache).and_call_original
expect_any_instance_of(Repository).to receive(:branch_names).and_call_original
expect_any_instance_of(Repository).to receive(:has_visible_content?).and_call_original
expect_any_instance_of(Gitlab::Git::Repository).to receive(:has_visible_content?).and_call_original
subject.perform(*params)
end
context 'when the repository has joined a pool' do
let!(:pool) { create(:pool_repository, :ready) }
let(:project) { pool.source_project }
it 'ensures the repositories are linked' do
expect_any_instance_of(PoolRepository).to receive(:link_repository).once
subject.perform(*params)
end
end
context 'LFS object garbage collection' do
before do
stub_lfs_setting(enabled: true)
end
let_it_be(:lfs_reference) { create(:lfs_objects_project, project: project) }
let(:lfs_object) { lfs_reference.lfs_object }
it 'cleans up unreferenced LFS objects' do
expect_next_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences) do |svc|
expect(svc.project).to eq(project)
expect(svc.dry_run).to be_falsy
expect(svc).to receive(:run!).and_call_original
end
subject.perform(*params)
expect(project.lfs_objects.reload).not_to include(lfs_object)
end
it 'catches and logs exceptions' do
expect_any_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences)
.to receive(:run!)
.and_raise(/Failed/)
expect(Gitlab::GitLogger).to receive(:warn)
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception)
subject.perform(*params)
end
it 'does nothing if the database is read-only' do
allow(Gitlab::Database).to receive(:read_only?) { true }
expect_any_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences).not_to receive(:run!)
subject.perform(*params)
expect(project.lfs_objects.reload).to include(lfs_object)
end
end
end
context 'when no lease can be obtained' do
before do
expect(subject).to receive(:try_obtain_lease).and_return(false)
end
it 'returns silently' do
expect(subject).not_to receive(:command)
expect_any_instance_of(Repository).not_to receive(:expire_branches_cache).and_call_original
expect_any_instance_of(Repository).not_to receive(:branch_names).and_call_original
expect_any_instance_of(Repository).not_to receive(:has_visible_content?).and_call_original
subject.perform(*params)
end
end
end
context "repack_full" do
let(:task) { :full_repack }
let(:gitaly_task) { :repack_full }
before do
expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
end
it_behaves_like 'it calls Gitaly'
it_behaves_like 'it updates the project statistics'
end
context "pack_refs" do
let(:task) { :pack_refs }
let(:gitaly_task) { :pack_refs }
before do
expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
end
it "calls Gitaly" do
expect_any_instance_of(Gitlab::GitalyClient::RefService).to receive(task)
.and_return(nil)
subject.perform(*params)
end
it 'does not update the project statistics' do
expect(Projects::UpdateStatisticsService).not_to receive(:new)
subject.perform(*params)
end
end
context "repack_incremental" do
let(:task) { :incremental_repack }
let(:gitaly_task) { :repack_incremental }
before do
expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
end
it_behaves_like 'it calls Gitaly'
it_behaves_like 'it updates the project statistics'
end
shared_examples 'gc tasks' do
before do
allow(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
allow(subject).to receive(:bitmaps_enabled?).and_return(bitmaps_enabled)
end
it 'incremental repack adds a new packfile' do
create_objects(project)
before_packs = packs(project)
expect(before_packs.count).to be >= 1
subject.perform(project.id, 'incremental_repack', lease_key, lease_uuid)
after_packs = packs(project)
# Exactly one new pack should have been created
expect(after_packs.count).to eq(before_packs.count + 1)
# Previously existing packs are still around
expect(before_packs & after_packs).to eq(before_packs)
end
it 'full repack consolidates into 1 packfile' do
create_objects(project)
subject.perform(project.id, 'incremental_repack', lease_key, lease_uuid)
before_packs = packs(project)
expect(before_packs.count).to be >= 2
subject.perform(project.id, 'full_repack', lease_key, lease_uuid)
after_packs = packs(project)
expect(after_packs.count).to eq(1)
# Previously existing packs should be gone now
expect(after_packs - before_packs).to eq(after_packs)
expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled)
end
it 'gc consolidates into 1 packfile and updates packed-refs' do
create_objects(project)
before_packs = packs(project)
before_packed_refs = packed_refs(project)
expect(before_packs.count).to be >= 1
expect_any_instance_of(Gitlab::GitalyClient::RepositoryService)
.to receive(:garbage_collect)
.with(bitmaps_enabled, prune: false)
.and_call_original
subject.perform(project.id, 'gc', lease_key, lease_uuid)
after_packed_refs = packed_refs(project)
after_packs = packs(project)
expect(after_packs.count).to eq(1)
# Previously existing packs should be gone now
expect(after_packs - before_packs).to eq(after_packs)
# The packed-refs file should have been updated during 'git gc'
expect(before_packed_refs).not_to eq(after_packed_refs)
expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled)
end
it 'cleans up repository after finishing' do
expect_any_instance_of(Project).to receive(:cleanup).and_call_original
subject.perform(project.id, 'gc', lease_key, lease_uuid)
end
it 'prune calls garbage_collect with the option prune: true' do
expect_any_instance_of(Gitlab::GitalyClient::RepositoryService)
.to receive(:garbage_collect)
.with(bitmaps_enabled, prune: true)
.and_return(nil)
subject.perform(project.id, 'prune', lease_key, lease_uuid)
end
end
context 'with bitmaps enabled' do
let(:bitmaps_enabled) { true }
include_examples 'gc tasks'
end
context 'with bitmaps disabled' do
let(:bitmaps_enabled) { false }
include_examples 'gc tasks'
end
end
# Create a new commit on a random new branch
def create_objects(project)
rugged = rugged_repo(project.repository)
old_commit = rugged.branches.first.target
new_commit_sha = Rugged::Commit.create(
rugged,
message: "hello world #{SecureRandom.hex(6)}",
author: { email: 'foo@bar', name: 'baz' },
committer: { email: 'foo@bar', name: 'baz' },
tree: old_commit.tree,
parents: [old_commit]
)
rugged.references.create("refs/heads/#{SecureRandom.hex(6)}", new_commit_sha)
end
def packs(project)
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
Dir["#{project.repository.path_to_repo}/objects/pack/*.pack"]
end
end
def packed_refs(project)
path = "#{project.repository.path_to_repo}/packed-refs"
FileUtils.touch(path)
File.read(path)
end
def bitmap_path(pack)
pack.sub(/\.pack\z/, '.bitmap')
end
end
# frozen_string_literal: true
require 'fileutils'
require 'spec_helper'
RSpec.describe Projects::GitGarbageCollectWorker do
include GitHelpers
let_it_be(:project) { create(:project, :repository) }
let!(:lease_uuid) { SecureRandom.uuid }
let!(:lease_key) { "project_housekeeping:#{project.id}" }
let(:params) { [project.id, task, lease_key, lease_uuid] }
let(:shell) { Gitlab::Shell.new }
let(:repository) { project.repository }
subject { described_class.new }
before do
allow(subject).to receive(:find_project).and_return(project)
end
shared_examples 'it calls Gitaly' do
specify do
repository_service = instance_double(Gitlab::GitalyClient::RepositoryService)
expect(subject).to receive(:get_gitaly_client).with(task, repository.raw_repository).and_return(repository_service)
expect(repository_service).to receive(gitaly_task)
subject.perform(*params)
end
end
shared_examples 'it updates the project statistics' do
it 'updates the project statistics' do
expect_next_instance_of(Projects::UpdateStatisticsService, project, nil, statistics: [:repository_size, :lfs_objects_size]) do |service|
expect(service).to receive(:execute)
end
subject.perform(*params)
end
it 'does nothing if the database is read-only' do
allow(Gitlab::Database).to receive(:read_only?) { true }
expect(Projects::UpdateStatisticsService).not_to receive(:new)
subject.perform(*params)
end
end
describe '#perform', :aggregate_failures do
let(:gitaly_task) { :garbage_collect }
let(:task) { :gc }
context 'with active lease_uuid' do
before do
allow(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
end
it_behaves_like 'it calls Gitaly'
it_behaves_like 'it updates the project statistics'
it "flushes ref caches when the task if 'gc'" do
expect(subject).to receive(:renew_lease).with(lease_key, lease_uuid).and_call_original
expect(repository).to receive(:expire_branches_cache).and_call_original
expect(repository).to receive(:branch_names).and_call_original
expect(repository).to receive(:has_visible_content?).and_call_original
expect(repository.raw_repository).to receive(:has_visible_content?).and_call_original
subject.perform(*params)
end
it 'handles gRPC errors' do
allow_next_instance_of(Gitlab::GitalyClient::RepositoryService, repository.raw_repository) do |instance|
allow(instance).to receive(:garbage_collect).and_raise(GRPC::NotFound)
end
expect { subject.perform(*params) }.to raise_exception(Gitlab::Git::Repository::NoRepository)
end
end
context 'with different lease than the active one' do
before do
allow(subject).to receive(:get_lease_uuid).and_return(SecureRandom.uuid)
end
it 'returns silently' do
expect(repository).not_to receive(:expire_branches_cache).and_call_original
expect(repository).not_to receive(:branch_names).and_call_original
expect(repository).not_to receive(:has_visible_content?).and_call_original
subject.perform(*params)
end
end
context 'with no active lease' do
let(:params) { [project.id] }
before do
allow(subject).to receive(:get_lease_uuid).and_return(false)
end
context 'when is able to get the lease' do
before do
allow(subject).to receive(:try_obtain_lease).and_return(SecureRandom.uuid)
end
it_behaves_like 'it calls Gitaly'
it_behaves_like 'it updates the project statistics'
it "flushes ref caches when the task if 'gc'" do
expect(subject).to receive(:get_lease_uuid).with("git_gc:#{task}:#{project.id}").and_return(false)
expect(repository).to receive(:expire_branches_cache).and_call_original
expect(repository).to receive(:branch_names).and_call_original
expect(repository).to receive(:has_visible_content?).and_call_original
expect(repository.raw_repository).to receive(:has_visible_content?).and_call_original
subject.perform(*params)
end
context 'when the repository has joined a pool' do
let!(:pool) { create(:pool_repository, :ready) }
let(:project) { pool.source_project }
it 'ensures the repositories are linked' do
expect(project.pool_repository).to receive(:link_repository).once
subject.perform(*params)
end
end
context 'LFS object garbage collection' do
before do
stub_lfs_setting(enabled: true)
end
let_it_be(:lfs_reference) { create(:lfs_objects_project, project: project) }
let(:lfs_object) { lfs_reference.lfs_object }
it 'cleans up unreferenced LFS objects' do
expect_next_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences) do |svc|
expect(svc.project).to eq(project)
expect(svc.dry_run).to be_falsy
expect(svc).to receive(:run!).and_call_original
end
subject.perform(*params)
expect(project.lfs_objects.reload).not_to include(lfs_object)
end
it 'catches and logs exceptions' do
allow_next_instance_of(Gitlab::Cleanup::OrphanLfsFileReferences) do |svc|
allow(svg).to receive(:run!).and_raise(/Failed/)
end
expect(Gitlab::GitLogger).to receive(:warn)
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_for_dev_exception)
subject.perform(*params)
end
it 'does nothing if the database is read-only' do
allow(Gitlab::Database).to receive(:read_only?) { true }
expect(Gitlab::Cleanup::OrphanLfsFileReferences).not_to receive(:new)
subject.perform(*params)
expect(project.lfs_objects.reload).to include(lfs_object)
end
end
end
context 'when no lease can be obtained' do
it 'returns silently' do
expect(subject).to receive(:try_obtain_lease).and_return(false)
expect(subject).not_to receive(:command)
expect(repository).not_to receive(:expire_branches_cache).and_call_original
expect(repository).not_to receive(:branch_names).and_call_original
expect(repository).not_to receive(:has_visible_content?).and_call_original
subject.perform(*params)
end
end
end
context 'repack_full' do
let(:task) { :full_repack }
let(:gitaly_task) { :repack_full }
before do
expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
end
it_behaves_like 'it calls Gitaly'
it_behaves_like 'it updates the project statistics'
end
context 'pack_refs' do
let(:task) { :pack_refs }
let(:gitaly_task) { :pack_refs }
before do
expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
end
it 'calls Gitaly' do
repository_service = instance_double(Gitlab::GitalyClient::RefService)
expect(subject).to receive(:get_gitaly_client).with(task, repository.raw_repository).and_return(repository_service)
expect(repository_service).to receive(gitaly_task)
subject.perform(*params)
end
it 'does not update the project statistics' do
expect(Projects::UpdateStatisticsService).not_to receive(:new)
subject.perform(*params)
end
end
context 'repack_incremental' do
let(:task) { :incremental_repack }
let(:gitaly_task) { :repack_incremental }
before do
expect(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
end
it_behaves_like 'it calls Gitaly'
it_behaves_like 'it updates the project statistics'
end
shared_examples 'gc tasks' do
before do
allow(subject).to receive(:get_lease_uuid).and_return(lease_uuid)
allow(subject).to receive(:bitmaps_enabled?).and_return(bitmaps_enabled)
end
it 'incremental repack adds a new packfile' do
create_objects(project)
before_packs = packs(project)
expect(before_packs.count).to be >= 1
subject.perform(project.id, 'incremental_repack', lease_key, lease_uuid)
after_packs = packs(project)
# Exactly one new pack should have been created
expect(after_packs.count).to eq(before_packs.count + 1)
# Previously existing packs are still around
expect(before_packs & after_packs).to eq(before_packs)
end
it 'full repack consolidates into 1 packfile' do
create_objects(project)
subject.perform(project.id, 'incremental_repack', lease_key, lease_uuid)
before_packs = packs(project)
expect(before_packs.count).to be >= 2
subject.perform(project.id, 'full_repack', lease_key, lease_uuid)
after_packs = packs(project)
expect(after_packs.count).to eq(1)
# Previously existing packs should be gone now
expect(after_packs - before_packs).to eq(after_packs)
expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled)
end
it 'gc consolidates into 1 packfile and updates packed-refs' do
create_objects(project)
before_packs = packs(project)
before_packed_refs = packed_refs(project)
expect(before_packs.count).to be >= 1
# It's quite difficult to use `expect_next_instance_of` in this place
# because the RepositoryService is instantiated several times to do
# some repository calls like `exists?`, `create_repository`, ... .
# Therefore, since we're instantiating the object several times,
# RSpec has troubles figuring out which instance is the next and which
# one we want to mock.
# Besides, at this point, we actually want to perform the call to Gitaly,
# otherwise we would just use `instance_double` like in other parts of the
# spec file.
expect_any_instance_of(Gitlab::GitalyClient::RepositoryService) # rubocop:disable RSpec/AnyInstanceOf
.to receive(:garbage_collect)
.with(bitmaps_enabled, prune: false)
.and_call_original
subject.perform(project.id, 'gc', lease_key, lease_uuid)
after_packed_refs = packed_refs(project)
after_packs = packs(project)
expect(after_packs.count).to eq(1)
# Previously existing packs should be gone now
expect(after_packs - before_packs).to eq(after_packs)
# The packed-refs file should have been updated during 'git gc'
expect(before_packed_refs).not_to eq(after_packed_refs)
expect(File.exist?(bitmap_path(after_packs.first))).to eq(bitmaps_enabled)
end
it 'cleans up repository after finishing' do
expect(project).to receive(:cleanup).and_call_original
subject.perform(project.id, 'gc', lease_key, lease_uuid)
end
it 'prune calls garbage_collect with the option prune: true' do
repository_service = instance_double(Gitlab::GitalyClient::RepositoryService)
expect(subject).to receive(:get_gitaly_client).with(:prune, repository.raw_repository).and_return(repository_service)
expect(repository_service).to receive(:garbage_collect).with(bitmaps_enabled, prune: true)
subject.perform(project.id, 'prune', lease_key, lease_uuid)
end
end
context 'with bitmaps enabled' do
let(:bitmaps_enabled) { true }
include_examples 'gc tasks'
end
context 'with bitmaps disabled' do
let(:bitmaps_enabled) { false }
include_examples 'gc tasks'
end
end
# Create a new commit on a random new branch
def create_objects(project)
rugged = rugged_repo(project.repository)
old_commit = rugged.branches.first.target
new_commit_sha = Rugged::Commit.create(
rugged,
message: "hello world #{SecureRandom.hex(6)}",
author: { email: 'foo@bar', name: 'baz' },
committer: { email: 'foo@bar', name: 'baz' },
tree: old_commit.tree,
parents: [old_commit]
)
rugged.references.create("refs/heads/#{SecureRandom.hex(6)}", new_commit_sha)
end
def packs(project)
Dir["#{path_to_repo}/objects/pack/*.pack"]
end
def packed_refs(project)
path = File.join(path_to_repo, 'packed-refs')
FileUtils.touch(path)
File.read(path)
end
def path_to_repo
@path_to_repo ||= File.join(TestEnv.repos_path, project.repository.relative_path)
end
def bitmap_path(pack)
pack.sub(/\.pack\z/, '.bitmap')
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment