Commit cf292675 authored by Valery Sizov's avatar Valery Sizov Committed by Michael Kozono

Deletion of Package Files by RegistryConsistencyWorker

It allows RegistryConsistencyWorker to delete orphaned
registries and referenced files
parent 8dd8e6ad
......@@ -34,6 +34,13 @@ module Geo
raise NotImplementedError
end
# Return the absolute path to locally stored package file
#
# @return [String] File path
def blob_path
carrierwave_uploader.class.absolute_path(carrierwave_uploader)
end
def calculate_checksum!
checksum = model_record.calculate_checksum!
update_verification_state!(checksum: checksum)
......
......@@ -36,7 +36,13 @@ class Geo::BaseRegistry < Geo::TrackingBase
end
def self.delete_for_model_ids(ids)
raise NotImplementedError, "#{self.class} does not implement #{__method__}"
ids.map do |id|
delete_worker_class.perform_async(replicator_class.replicable_name, id)
end
end
def self.replicator_class
self::MODEL_CLASS.replicator_class
end
def self.find_unsynced_registries(batch_size:, except_ids: [])
......@@ -56,8 +62,8 @@ class Geo::BaseRegistry < Geo::TrackingBase
true
end
def model_record_id
read_attribute(self.class::MODEL_FOREIGN_KEY)
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
......@@ -73,4 +79,8 @@ class Geo::BaseRegistry < Geo::TrackingBase
[untracked_ids, unused_tracked_ids]
end
def model_record_id
read_attribute(self.class::MODEL_FOREIGN_KEY)
end
end
......@@ -16,10 +16,6 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry
::Geo::JobArtifactRegistryFinder
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
......
......@@ -21,10 +21,6 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry
::Geo::LfsObjectRegistryFinder
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
......
......@@ -11,9 +11,4 @@ class Geo::PackageFileRegistry < Geo::BaseRegistry
sha_attribute :verification_checksum
sha_attribute :verification_checksum_mismatched
def self.delete_for_model_ids(package_file_ids)
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/222635
[]
end
end
......@@ -22,10 +22,6 @@ class Geo::UploadRegistry < Geo::BaseRegistry
::Geo::AttachmentRegistryFinder
end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end
......
......@@ -26,15 +26,8 @@ module Geo
break
end
if file_path && File.exist?(file_path)
log_info('Unlinking file', file_path: file_path)
File.unlink(file_path)
else
log_error('Unable to unlink file because file path is unknown. A file may be orphaned', object_type: object_type, object_db_id: object_db_id)
end
log_info('Removing file registry', file_registry_id: file_registry.id)
file_registry.destroy
destroy_file
destroy_registry
log_info('Local file & registry removed')
end
......@@ -52,16 +45,48 @@ module Geo
::Geo::JobArtifactRegistry.find_by(artifact_id: object_db_id)
elsif lfs?
::Geo::LfsObjectRegistry.find_by(lfs_object_id: object_db_id)
else
elsif user_upload?
::Geo::UploadRegistry.find_by(file_type: object_type, file_id: object_db_id)
elsif replicator
replicator.registry
end
end
end
# rubocop: enable CodeReuse/ActiveRecord
def destroy_file
if file_path && File.exist?(file_path)
log_info('Unlinking file', file_path: file_path)
File.unlink(file_path)
else
log_error('Unable to unlink file because file path is unknown. A file may be orphaned', object_type: object_type, object_db_id: object_db_id)
end
end
def destroy_registry
log_info('Removing file registry', file_registry_id: file_registry.id)
file_registry.destroy
end
def replicator
strong_memoize(:replicator) do
Gitlab::Geo::Replicator.for_replicable_params(replicable_name: object_type.to_s, replicable_id: object_db_id)
rescue NotImplementedError
nil
end
end
def blob_path_from_replicator
replicator.blob_path
rescue ActiveRecord::RecordNotFound
nil
end
def file_path
strong_memoize(:file_path) do
next @object_file_path if @object_file_path
next blob_path_from_replicator if replicator
# When local storage is used, just rely on the existing methods
next if file_uploader.nil?
next file_uploader.file.path if file_uploader.object_store == ObjectStorage::Store::LOCAL
......
......@@ -8,10 +8,12 @@ module Geo
loggable_arguments 0, 2
def perform(object_type, object_db_id, file_path = nil)
log_info('Executing Geo::FileRegistryRemovalService', id: object_db_id, type: object_type, file_path: file_path)
# This worker not only works for Self-Service Framework, it's also backward compatible
# "object_type" is "replicable_name" and "object_db_id" is "replicable_id".
def perform(replicable_name, replicable_id, file_path = nil)
log_info('Executing Geo::FileRegistryRemovalService', id: replicable_id, type: replicable_name, file_path: file_path)
::Geo::FileRegistryRemovalService.new(object_type, object_db_id, file_path).execute
::Geo::FileRegistryRemovalService.new(replicable_name, replicable_id, file_path).execute
end
end
end
......@@ -30,6 +30,10 @@ module Gitlab
define_method :replicator do
@_replicator ||= klass.new(model_record: self)
end
define_singleton_method :replicator_class do
@_replicator_class ||= klass
end
RUBY
end
end
......
......@@ -224,7 +224,7 @@ module Gitlab
#
# @return [Geo::BaseRegistry] registry instance
def registry
registry_class.for_model_record_id(model_record.id)
registry_class.for_model_record_id(model_record_id)
end
# Checksum value from the main database
......
......@@ -129,6 +129,42 @@ RSpec.describe Geo::FileRegistryRemovalService, :geo do
end
end
shared_examples 'removes package file' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
before do
stub_exclusive_lease("file_registry_removal_service:package_file:#{registry.package_file_id}",
timeout: Geo::FileRegistryRemovalService::LEASE_TIMEOUT)
end
it 'file from disk' do
expect do
service.execute
end.to change { File.exist?(file_path) }.from(true).to(false)
end
it 'deletes registry entry' do
expect do
service.execute
end.to change(Geo::PackageFileRegistry, :count).by(-1)
end
end
shared_examples 'removes package file registry' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
before do
stub_exclusive_lease("file_registry_removal_service:package_file:#{registry.package_file_id}",
timeout: Geo::FileRegistryRemovalService::LEASE_TIMEOUT)
end
it 'deletes registry entry' do
expect do
service.execute
end.to change(Geo::PackageFileRegistry, :count).by(-1)
end
end
context 'with LFS object' do
let!(:lfs_object) { create(:lfs_object, :with_file) }
let!(:registry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object.id) }
......@@ -380,5 +416,39 @@ RSpec.describe Geo::FileRegistryRemovalService, :geo do
end
end
end
context 'with package file' do
let(:package_file) { create(:package_file_with_file) }
let!(:registry) { create(:geo_package_file_registry, package_file: package_file) }
let(:file_path) { Tempfile.new.path }
before do
allow_next_instance_of(Geo::PackageFileReplicator) do |replicator|
allow(replicator).to receive(:blob_path).and_return(file_path)
end
end
it_behaves_like 'removes package file'
context 'no package file record' do
before do
package_file.delete
end
it_behaves_like 'removes package file' do
subject(:service) { described_class.new('package_file', registry.package_file_id, file_path) }
end
end
context 'with orphaned registry' do
before do
package_file.delete
end
it_behaves_like 'removes package file registry' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
end
end
end
end
end
......@@ -156,81 +156,79 @@ RSpec.describe Geo::RegistryConsistencyService, :geo, :use_clean_rails_memory_st
end
end
unless klass == Geo::PackageFileRegistry
context 'when there are unused registries' do
context 'with no replicable records' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_model_ids) { records.map(&:id) }
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
context 'when there are unused registries' do
context 'with no replicable records' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_model_ids) { records.map(&:id) }
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
end
before do
model_class.where(id: unused_model_ids).delete_all
end
before do
model_class.where(id: unused_model_ids).delete_all
end
it 'deletes unused registries', :sidekiq_inline do
subject.execute
it 'deletes unused registries', :sidekiq_inline do
subject.execute
expect(registry_class.where(model_foreign_key => unused_model_ids)).to be_empty
end
expect(registry_class.where(model_foreign_key => unused_model_ids)).to be_empty
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
end
context 'when the unused registry foreign key ids are lower than the first replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.first].map(&:id) }
context 'when the unused registry foreign key ids are lower than the first replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.first].map(&:id) }
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
end
before do
model_class.where(id: unused_registry_ids).delete_all
end
before do
model_class.where(id: unused_registry_ids).delete_all
end
it 'deletes unused registries', :sidekiq_inline do
subject.execute
it 'deletes unused registries', :sidekiq_inline do
subject.execute
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
end
context 'when the unused registry foreign key ids are greater than the last replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.last].map(&:id) }
context 'when the unused registry foreign key ids are greater than the last replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.last].map(&:id) }
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
let!(:registries) do
records.map do |record|
create(registry_class_factory, model_foreign_key => record.id)
end
end
before do
model_class.where(id: unused_registry_ids).delete_all
end
before do
model_class.where(id: unused_registry_ids).delete_all
end
it 'deletes unused registries', :sidekiq_inline do
subject.execute
it 'deletes unused registries', :sidekiq_inline do
subject.execute
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
it 'returns truthy' do
expect(subject.execute).to be_truthy
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment