Commit 5999400b authored by Peter Leitzen's avatar Peter Leitzen

Merge branch '296892-fj-split-git-garbage-collect-worker' into 'master'

Split GitGarbageCollectWorker logic for projects and wikis

See merge request gitlab-org/gitlab!52012
parents 118d7875 9b09c3d7
......@@ -16,6 +16,10 @@ module Repositories
Gitlab::Redis::SharedState.with { |redis| redis.del(pushes_since_gc_redis_shared_state_key) }
end
def git_garbage_collect_worker_klass
raise NotImplementedError
end
private
def pushes_since_gc_redis_shared_state_key
......
......@@ -2522,6 +2522,11 @@ class Project < ApplicationRecord
tracing_setting&.external_url
end
override :git_garbage_collect_worker_klass
def git_garbage_collect_worker_klass
Projects::GitGarbageCollectWorker
end
private
def find_service(services, name)
......
......@@ -256,6 +256,15 @@ class Wiki
def after_post_receive
end
override :git_garbage_collect_worker_klass
def git_garbage_collect_worker_klass
Wikis::GitGarbageCollectWorker
end
def cleanup
@repository = nil
end
private
def commit_details(action, message = nil, title = nil)
......
......@@ -45,7 +45,7 @@ module Repositories
private
def execute_gitlab_shell_gc(lease_uuid)
Projects::GitGarbageCollectWorker.perform_async(@resource.id, task, lease_key, lease_uuid)
@resource.git_garbage_collect_worker_klass.perform_async(@resource.id, task, lease_key, lease_uuid)
ensure
if pushes_since_gc >= gc_period
Gitlab::Metrics.measure(:reset_pushes_since_gc) do
......
......@@ -2239,6 +2239,14 @@
:weight: 1
:idempotent: true
:tags: []
- :name: wikis_git_garbage_collect
:feature_category: :gitaly
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: x509_certificate_revoke
:feature_category: :source_code_management
:has_external_dependencies:
......
# frozen_string_literal: true
module GitGarbageCollectMethods
extend ActiveSupport::Concern
included do
include ApplicationWorker
sidekiq_options retry: false
feature_category :gitaly
loggable_arguments 1, 2, 3
end
# Timeout set to 24h
LEASE_TIMEOUT = 86400
def perform(resource_id, task = :gc, lease_key = nil, lease_uuid = nil)
resource = find_resource(resource_id)
lease_key ||= default_lease_key(task, resource)
active_uuid = get_lease_uuid(lease_key)
if active_uuid
return unless active_uuid == lease_uuid
renew_lease(lease_key, active_uuid)
else
lease_uuid = try_obtain_lease(lease_key)
return unless lease_uuid
end
task = task.to_sym
before_gitaly_call(task, resource)
gitaly_call(task, resource)
# Refresh the branch cache in case garbage collection caused a ref lookup to fail
flush_ref_caches(resource) if gc?(task)
update_repository_statistics(resource) if task != :pack_refs
# In case pack files are deleted, release libgit2 cache and open file
# descriptors ASAP instead of waiting for Ruby garbage collection
resource.cleanup
ensure
cancel_lease(lease_key, lease_uuid) if lease_key.present? && lease_uuid.present?
end
private
def default_lease_key(task, resource)
"git_gc:#{task}:#{resource.class.name.underscore.pluralize}:#{resource.id}"
end
def find_resource(id)
raise NotImplementedError
end
def gc?(task)
task == :gc || task == :prune
end
def try_obtain_lease(key)
::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain
end
def renew_lease(key, uuid)
::Gitlab::ExclusiveLease.new(key, uuid: uuid, timeout: LEASE_TIMEOUT).renew
end
def cancel_lease(key, uuid)
::Gitlab::ExclusiveLease.cancel(key, uuid)
end
def get_lease_uuid(key)
::Gitlab::ExclusiveLease.get_uuid(key)
end
def before_gitaly_call(task, resource)
# no-op
end
def gitaly_call(task, resource)
repository = resource.repository.raw_repository
client = get_gitaly_client(task, repository)
case task
when :prune, :gc
client.garbage_collect(bitmaps_enabled?, prune: task == :prune)
when :full_repack
client.repack_full(bitmaps_enabled?)
when :incremental_repack
client.repack_incremental
when :pack_refs
client.pack_refs
end
rescue GRPC::NotFound => e
Gitlab::GitLogger.error("#{__method__} failed:\nRepository not found")
raise Gitlab::Git::Repository::NoRepository.new(e)
rescue GRPC::BadStatus => e
Gitlab::GitLogger.error("#{__method__} failed:\n#{e}")
raise Gitlab::Git::CommandError.new(e)
end
def get_gitaly_client(task, repository)
if task == :pack_refs
Gitlab::GitalyClient::RefService
else
Gitlab::GitalyClient::RepositoryService
end.new(repository)
end
def bitmaps_enabled?
Gitlab::CurrentSettings.housekeeping_bitmaps_enabled
end
def flush_ref_caches(resource)
resource.repository.expire_branches_cache
resource.repository.branch_names
resource.repository.has_visible_content?
end
def update_repository_statistics(resource)
resource.repository.expire_statistics_caches
return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary
update_db_repository_statistics(resource)
end
def update_db_repository_statistics(resource)
# no-op
end
end
......@@ -2,131 +2,41 @@
module Projects
class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
sidekiq_options retry: false
feature_category :gitaly
loggable_arguments 1, 2, 3
# Timeout set to 24h
LEASE_TIMEOUT = 86400
def perform(project_id, task = :gc, lease_key = nil, lease_uuid = nil)
lease_key ||= "git_gc:#{task}:#{project_id}"
project = find_project(project_id)
active_uuid = get_lease_uuid(lease_key)
if active_uuid
return unless active_uuid == lease_uuid
renew_lease(lease_key, active_uuid)
else
lease_uuid = try_obtain_lease(lease_key)
return unless lease_uuid
end
task = task.to_sym
if gc?(task)
::Projects::GitDeduplicationService.new(project).execute
cleanup_orphan_lfs_file_references(project)
end
gitaly_call(task, project)
# Refresh the branch cache in case garbage collection caused a ref lookup to fail
flush_ref_caches(project) if gc?(task)
update_repository_statistics(project) if task != :pack_refs
# In case pack files are deleted, release libgit2 cache and open file
# descriptors ASAP instead of waiting for Ruby garbage collection
project.cleanup
ensure
cancel_lease(lease_key, lease_uuid) if lease_key.present? && lease_uuid.present?
end
extend ::Gitlab::Utils::Override
include GitGarbageCollectMethods
private
def find_project(project_id)
Project.find(project_id)
override :default_lease_key
def default_lease_key(task, resource)
"git_gc:#{task}:#{resource.id}"
end
def gc?(task)
task == :gc || task == :prune
override :find_resource
def find_resource(id)
Project.find(id)
end
def try_obtain_lease(key)
::Gitlab::ExclusiveLease.new(key, timeout: LEASE_TIMEOUT).try_obtain
end
override :before_gitaly_call
def before_gitaly_call(task, resource)
return unless gc?(task)
def renew_lease(key, uuid)
::Gitlab::ExclusiveLease.new(key, uuid: uuid, timeout: LEASE_TIMEOUT).renew
::Projects::GitDeduplicationService.new(resource).execute
cleanup_orphan_lfs_file_references(resource)
end
def cancel_lease(key, uuid)
::Gitlab::ExclusiveLease.cancel(key, uuid)
end
def get_lease_uuid(key)
::Gitlab::ExclusiveLease.get_uuid(key)
end
def gitaly_call(task, project)
repository = project.repository.raw_repository
client = get_gitaly_client(task, repository)
case task
when :prune, :gc
client.garbage_collect(bitmaps_enabled?, prune: task == :prune)
when :full_repack
client.repack_full(bitmaps_enabled?)
when :incremental_repack
client.repack_incremental
when :pack_refs
client.pack_refs
end
rescue GRPC::NotFound => e
Gitlab::GitLogger.error("#{__method__} failed:\nRepository not found")
raise Gitlab::Git::Repository::NoRepository.new(e)
rescue GRPC::BadStatus => e
Gitlab::GitLogger.error("#{__method__} failed:\n#{e}")
raise Gitlab::Git::CommandError.new(e)
end
def get_gitaly_client(task, repository)
if task == :pack_refs
Gitlab::GitalyClient::RefService
else
Gitlab::GitalyClient::RepositoryService
end.new(repository)
end
def cleanup_orphan_lfs_file_references(project)
def cleanup_orphan_lfs_file_references(resource)
return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary
::Gitlab::Cleanup::OrphanLfsFileReferences.new(project, dry_run: false, logger: logger).run!
::Gitlab::Cleanup::OrphanLfsFileReferences.new(resource, dry_run: false, logger: logger).run!
rescue => err
Gitlab::GitLogger.warn(message: "Cleaning up orphan LFS objects files failed", error: err.message)
Gitlab::ErrorTracking.track_and_raise_for_dev_exception(err)
end
def flush_ref_caches(project)
project.repository.expire_branches_cache
project.repository.branch_names
project.repository.has_visible_content?
end
def update_repository_statistics(project)
project.repository.expire_statistics_caches
return if Gitlab::Database.read_only? # GitGarbageCollectWorker may be run on a Geo secondary
Projects::UpdateStatisticsService.new(project, nil, statistics: [:repository_size, :lfs_objects_size]).execute
end
def bitmaps_enabled?
Gitlab::CurrentSettings.housekeeping_bitmaps_enabled
override :update_db_repository_statistics
def update_db_repository_statistics(resource)
Projects::UpdateStatisticsService.new(resource, nil, statistics: [:repository_size, :lfs_objects_size]).execute
end
end
end
# frozen_string_literal: true
module Wikis
class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
extend ::Gitlab::Utils::Override
include GitGarbageCollectMethods
private
override :find_resource
def find_resource(id)
Project.find(id).wiki
end
override :update_db_repository_statistics
def update_db_repository_statistics(resource)
Projects::UpdateStatisticsService.new(resource.container, nil, statistics: [:wiki_size]).execute
end
end
end
......@@ -158,6 +158,8 @@
- 1
- - group_saml_group_sync
- 1
- - group_wikis_git_garbage_collect
- 1
- - hashed_storage
- 1
- - import_issues_csv
......@@ -362,5 +364,7 @@
- 1
- - web_hooks_destroy
- 1
- - wikis_git_garbage_collect
- 1
- - x509_certificate_revoke
- 1
......@@ -50,4 +50,9 @@ class GroupWiki < Wiki
# TODO: Update group wiki storage
# https://gitlab.com/gitlab-org/gitlab/-/issues/230465
end
override :git_garbage_collect_worker_klass
def git_garbage_collect_worker_klass
GroupWikis::GitGarbageCollectWorker
end
end
......@@ -757,6 +757,14 @@
:weight: 1
:idempotent: true
:tags: []
- :name: group_wikis_git_garbage_collect
:feature_category: :gitaly
:has_external_dependencies:
:urgency: :low
:resource_boundary: :unknown
:weight: 1
:idempotent:
:tags: []
- :name: incident_management_apply_incident_sla_exceeded_label
:feature_category: :incident_management
:has_external_dependencies:
......
# frozen_string_literal: true
module GroupWikis
class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
extend ::Gitlab::Utils::Override
include GitGarbageCollectMethods
private
override :find_resource
def find_resource(id)
Group.find(id).wiki
end
end
end
......@@ -133,5 +133,6 @@ RSpec.describe GroupWiki do
let_it_be(:resource) { create(:group_wiki) }
let(:resource_key) { 'group_wikis' }
let(:expected_worker_class) { ::GroupWikis::GitGarbageCollectWorker }
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe GroupWikis::GitGarbageCollectWorker do
it_behaves_like 'can collect git garbage', update_statistics: false do
let_it_be(:resource) { create(:group_wiki) }
let_it_be(:page) { create(:wiki_page, wiki: resource) }
let(:expected_default_lease) { "group_wikis:#{resource.id}" }
end
end
......@@ -2999,6 +2999,7 @@ RSpec.describe Project, factory_default: :keep do
it_behaves_like 'can housekeep repository' do
let(:resource) { build_stubbed(:project) }
let(:resource_key) { 'projects' }
let(:expected_worker_class) { Projects::GitGarbageCollectWorker }
end
describe '#deployment_variables' do
......
......@@ -47,5 +47,6 @@ RSpec.describe ProjectWiki do
let_it_be(:resource) { create(:project_wiki) }
let(:resource_key) { 'project_wikis' }
let(:expected_worker_class) { Wikis::GitGarbageCollectWorker }
end
end
......@@ -41,5 +41,11 @@ RSpec.shared_examples 'can housekeep repository' do
expect(resource.send(:pushes_since_gc_redis_shared_state_key)).to eq("#{resource_key}/#{resource.id}/pushes_since_gc")
end
end
describe '#git_garbage_collect_worker_klass' do
it 'defines a git gargabe collect worker' do
expect(resource.git_garbage_collect_worker_klass).to eq(expected_worker_class)
end
end
end
end
......@@ -3,16 +3,16 @@
RSpec.shared_examples 'housekeeps repository' do
subject { described_class.new(resource) }
context 'with a clean redis state', :clean_gitlab_redis_shared_state do
context 'with a clean redis state', :clean_gitlab_redis_shared_state, :aggregate_failures do
describe '#execute' do
it 'enqueues a sidekiq job' do
expect(subject).to receive(:try_obtain_lease).and_return(:the_uuid)
expect(subject).to receive(:lease_key).and_return(:the_lease_key)
expect(subject).to receive(:task).and_return(:incremental_repack)
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original
expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid).and_call_original
Sidekiq::Testing.fake! do
expect { subject.execute }.to change(Projects::GitGarbageCollectWorker.jobs, :size).by(1)
expect { subject.execute }.to change(resource.git_garbage_collect_worker_klass.jobs, :size).by(1)
end
end
......@@ -38,7 +38,7 @@ RSpec.shared_examples 'housekeeps repository' do
end
it 'does not enqueue a job' do
expect(Projects::GitGarbageCollectWorker).not_to receive(:perform_async)
expect(resource.git_garbage_collect_worker_klass).not_to receive(:perform_async)
expect { subject.execute }.to raise_error(Repositories::HousekeepingService::LeaseTaken)
end
......@@ -63,16 +63,16 @@ RSpec.shared_examples 'housekeeps repository' do
allow(subject).to receive(:lease_key).and_return(:the_lease_key)
# At push 200
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :gc, :the_lease_key, :the_uuid)
expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :gc, :the_lease_key, :the_uuid)
.once
# At push 50, 100, 150
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :full_repack, :the_lease_key, :the_uuid)
expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :full_repack, :the_lease_key, :the_uuid)
.exactly(3).times
# At push 10, 20, ... (except those above)
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid)
expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :incremental_repack, :the_lease_key, :the_uuid)
.exactly(16).times
# At push 6, 12, 18, ... (except those above)
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :pack_refs, :the_lease_key, :the_uuid)
expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :pack_refs, :the_lease_key, :the_uuid)
.exactly(27).times
201.times do
......@@ -90,7 +90,7 @@ RSpec.shared_examples 'housekeeps repository' do
allow(housekeeping).to receive(:try_obtain_lease).and_return(:gc_uuid)
allow(housekeeping).to receive(:lease_key).and_return(:gc_lease_key)
expect(Projects::GitGarbageCollectWorker).to receive(:perform_async).with(resource.id, :gc, :gc_lease_key, :gc_uuid).twice
expect(resource.git_garbage_collect_worker_klass).to receive(:perform_async).with(resource.id, :gc, :gc_lease_key, :gc_uuid).twice
2.times do
housekeeping.execute
......
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Wikis::GitGarbageCollectWorker do
it_behaves_like 'can collect git garbage' do
let_it_be(:resource) { create(:project_wiki) }
let_it_be(:page) { create(:wiki_page, wiki: resource) }
let(:statistics_service_klass) { Projects::UpdateStatisticsService }
let(:statistics_keys) { [:wiki_size] }
let(:expected_default_lease) { "project_wikis:#{resource.id}" }
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment