Commit d52619d7 authored by Michael Kozono's avatar Michael Kozono

Merge branch '222635-delete-unused-registries' into 'master'

RegistryConsistencyWorker should be able to delete Package Files

See merge request gitlab-org/gitlab!34656
parents f040fb72 cf292675
......@@ -34,6 +34,13 @@ module Geo
raise NotImplementedError
end
# Return the absolute path to locally stored package file
#
# @return [String] File path
def blob_path
carrierwave_uploader.class.absolute_path(carrierwave_uploader)
end
def calculate_checksum!
checksum = model_record.calculate_checksum!
update_verification_state!(checksum: checksum)
......
......@@ -36,7 +36,13 @@ class Geo::BaseRegistry < Geo::TrackingBase
end
def self.delete_for_model_ids(ids)
raise NotImplementedError, "#{self.class} does not implement #{__method__}"
ids.map do |id|
delete_worker_class.perform_async(replicator_class.replicable_name, id)
end
end
def self.replicator_class
self::MODEL_CLASS.replicator_class
end
def self.find_unsynced_registries(batch_size:, except_ids: [])
......@@ -56,8 +62,8 @@ class Geo::BaseRegistry < Geo::TrackingBase
true
end
def model_record_id
read_attribute(self.class::MODEL_FOREIGN_KEY)
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
......@@ -73,4 +79,8 @@ class Geo::BaseRegistry < Geo::TrackingBase
[untracked_ids, unused_tracked_ids]
end
def model_record_id
read_attribute(self.class::MODEL_FOREIGN_KEY)
end
end
......@@ -16,10 +16,6 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry
::Geo::JobArtifactRegistryFinder
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
......
......@@ -21,10 +21,6 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry
::Geo::LfsObjectRegistryFinder
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
......
......@@ -11,9 +11,4 @@ class Geo::PackageFileRegistry < Geo::BaseRegistry
sha_attribute :verification_checksum
sha_attribute :verification_checksum_mismatched
def self.delete_for_model_ids(package_file_ids)
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/222635
[]
end
end
......@@ -22,10 +22,6 @@ class Geo::UploadRegistry < Geo::BaseRegistry
::Geo::AttachmentRegistryFinder
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
......
......@@ -26,15 +26,8 @@ module Geo
break
end
if file_path && File.exist?(file_path)
log_info('Unlinking file', file_path: file_path)
File.unlink(file_path)
else
log_error('Unable to unlink file because file path is unknown. A file may be orphaned', object_type: object_type, object_db_id: object_db_id)
end
log_info('Removing file registry', file_registry_id: file_registry.id)
file_registry.destroy
destroy_file
destroy_registry
log_info('Local file & registry removed')
end
......@@ -52,16 +45,48 @@ module Geo
::Geo::JobArtifactRegistry.find_by(artifact_id: object_db_id)
elsif lfs?
::Geo::LfsObjectRegistry.find_by(lfs_object_id: object_db_id)
else
elsif user_upload?
::Geo::UploadRegistry.find_by(file_type: object_type, file_id: object_db_id)
elsif replicator
replicator.registry
end
end
end
# rubocop: enable CodeReuse/ActiveRecord
def destroy_file
if file_path && File.exist?(file_path)
log_info('Unlinking file', file_path: file_path)
File.unlink(file_path)
else
log_error('Unable to unlink file because file path is unknown. A file may be orphaned', object_type: object_type, object_db_id: object_db_id)
end
end
def destroy_registry
log_info('Removing file registry', file_registry_id: file_registry.id)
file_registry.destroy
end
def replicator
strong_memoize(:replicator) do
Gitlab::Geo::Replicator.for_replicable_params(replicable_name: object_type.to_s, replicable_id: object_db_id)
rescue NotImplementedError
nil
end
end
def blob_path_from_replicator
replicator.blob_path
rescue ActiveRecord::RecordNotFound
nil
end
def file_path
strong_memoize(:file_path) do
next @object_file_path if @object_file_path
next blob_path_from_replicator if replicator
# When local storage is used, just rely on the existing methods
next if file_uploader.nil?
next file_uploader.file.path if file_uploader.object_store == ObjectStorage::Store::LOCAL
......
......@@ -8,10 +8,12 @@ module Geo
loggable_arguments 0, 2
def perform(object_type, object_db_id, file_path = nil)
log_info('Executing Geo::FileRegistryRemovalService', id: object_db_id, type: object_type, file_path: file_path)
# This worker not only works for Self-Service Framework, it's also backward compatible
# "object_type" is "replicable_name" and "object_db_id" is "replicable_id".
def perform(replicable_name, replicable_id, file_path = nil)
log_info('Executing Geo::FileRegistryRemovalService', id: replicable_id, type: replicable_name, file_path: file_path)
::Geo::FileRegistryRemovalService.new(object_type, object_db_id, file_path).execute
::Geo::FileRegistryRemovalService.new(replicable_name, replicable_id, file_path).execute
end
end
end
......@@ -30,6 +30,10 @@ module Gitlab
define_method :replicator do
@_replicator ||= klass.new(model_record: self)
end
define_singleton_method :replicator_class do
@_replicator_class ||= klass
end
RUBY
end
end
......
......@@ -224,7 +224,7 @@ module Gitlab
#
# @return [Geo::BaseRegistry] registry instance
def registry
registry_class.for_model_record_id(model_record.id)
registry_class.for_model_record_id(model_record_id)
end
# Checksum value from the main database
......
......@@ -129,6 +129,42 @@ RSpec.describe Geo::FileRegistryRemovalService, :geo do
end
end
shared_examples 'removes package file' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
before do
stub_exclusive_lease("file_registry_removal_service:package_file:#{registry.package_file_id}",
timeout: Geo::FileRegistryRemovalService::LEASE_TIMEOUT)
end
it 'file from disk' do
expect do
service.execute
end.to change { File.exist?(file_path) }.from(true).to(false)
end
it 'deletes registry entry' do
expect do
service.execute
end.to change(Geo::PackageFileRegistry, :count).by(-1)
end
end
shared_examples 'removes package file registry' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
before do
stub_exclusive_lease("file_registry_removal_service:package_file:#{registry.package_file_id}",
timeout: Geo::FileRegistryRemovalService::LEASE_TIMEOUT)
end
it 'deletes registry entry' do
expect do
service.execute
end.to change(Geo::PackageFileRegistry, :count).by(-1)
end
end
context 'with LFS object' do
let!(:lfs_object) { create(:lfs_object, :with_file) }
let!(:registry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object.id) }
......@@ -380,5 +416,39 @@ RSpec.describe Geo::FileRegistryRemovalService, :geo do
end
end
end
context 'with package file' do
let(:package_file) { create(:package_file_with_file) }
let!(:registry) { create(:geo_package_file_registry, package_file: package_file) }
let(:file_path) { Tempfile.new.path }
before do
allow_next_instance_of(Geo::PackageFileReplicator) do |replicator|
allow(replicator).to receive(:blob_path).and_return(file_path)
end
end
it_behaves_like 'removes package file'
context 'no package file record' do
before do
package_file.delete
end
it_behaves_like 'removes package file' do
subject(:service) { described_class.new('package_file', registry.package_file_id, file_path) }
end
end
context 'with orphaned registry' do
before do
package_file.delete
end
it_behaves_like 'removes package file registry' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
end
end
end
end
end
......@@ -156,81 +156,79 @@ RSpec.describe Geo::RegistryConsistencyService, :geo, :use_clean_rails_memory_st
end
end
unless klass == Geo::PackageFileRegistry
context 'when there are unused registries' do
context 'with no replicable records' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_model_ids) { records.map(&:id) }
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
context 'when there are unused registries' do
context 'with no replicable records' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_model_ids) { records.map(&:id) }
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
end
before do
model_class.where(id: unused_model_ids).delete_all
end
before do
model_class.where(id: unused_model_ids).delete_all
end
it 'deletes unused registries', :sidekiq_inline do
subject.execute
it 'deletes unused registries', :sidekiq_inline do
subject.execute
expect(registry_class.where(model_foreign_key => unused_model_ids)).to be_empty
end
expect(registry_class.where(model_foreign_key => unused_model_ids)).to be_empty
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
end
context 'when the unused registry foreign key ids are lower than the first replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.first].map(&:id) }
context 'when the unused registry foreign key ids are lower than the first replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.first].map(&:id) }
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
end
before do
model_class.where(id: unused_registry_ids).delete_all
end
before do
model_class.where(id: unused_registry_ids).delete_all
end
it 'deletes unused registries', :sidekiq_inline do
subject.execute
it 'deletes unused registries', :sidekiq_inline do
subject.execute
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
end
context 'when the unused registry foreign key ids are greater than the last replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.last].map(&:id) }
context 'when the unused registry foreign key ids are greater than the last replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.last].map(&:id) }
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
end
before do
model_class.where(id: unused_registry_ids).delete_all
end
before do
model_class.where(id: unused_registry_ids).delete_all
end
it 'deletes unused registries', :sidekiq_inline do
subject.execute
it 'deletes unused registries', :sidekiq_inline do
subject.execute
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment