Commit d52619d7 authored by Michael Kozono's avatar Michael Kozono

Merge branch '222635-delete-unused-registries' into 'master'

RegistryConsistencyWorker should be able to delete Package Files

See merge request gitlab-org/gitlab!34656
parents f040fb72 cf292675
...@@ -34,6 +34,13 @@ module Geo ...@@ -34,6 +34,13 @@ module Geo
raise NotImplementedError raise NotImplementedError
end end
# Return the absolute path to locally stored package file
#
# @return [String] File path
def blob_path
carrierwave_uploader.class.absolute_path(carrierwave_uploader)
end
def calculate_checksum! def calculate_checksum!
checksum = model_record.calculate_checksum! checksum = model_record.calculate_checksum!
update_verification_state!(checksum: checksum) update_verification_state!(checksum: checksum)
......
...@@ -36,7 +36,13 @@ class Geo::BaseRegistry < Geo::TrackingBase ...@@ -36,7 +36,13 @@ class Geo::BaseRegistry < Geo::TrackingBase
end end
def self.delete_for_model_ids(ids) def self.delete_for_model_ids(ids)
raise NotImplementedError, "#{self.class} does not implement #{__method__}" ids.map do |id|
delete_worker_class.perform_async(replicator_class.replicable_name, id)
end
end
def self.replicator_class
self::MODEL_CLASS.replicator_class
end end
def self.find_unsynced_registries(batch_size:, except_ids: []) def self.find_unsynced_registries(batch_size:, except_ids: [])
...@@ -56,8 +62,8 @@ class Geo::BaseRegistry < Geo::TrackingBase ...@@ -56,8 +62,8 @@ class Geo::BaseRegistry < Geo::TrackingBase
true true
end end
def model_record_id def self.delete_worker_class
read_attribute(self.class::MODEL_FOREIGN_KEY) ::Geo::FileRegistryRemovalWorker
end end
def self.find_registry_differences(range) def self.find_registry_differences(range)
...@@ -73,4 +79,8 @@ class Geo::BaseRegistry < Geo::TrackingBase ...@@ -73,4 +79,8 @@ class Geo::BaseRegistry < Geo::TrackingBase
[untracked_ids, unused_tracked_ids] [untracked_ids, unused_tracked_ids]
end end
def model_record_id
read_attribute(self.class::MODEL_FOREIGN_KEY)
end
end end
...@@ -16,10 +16,6 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry ...@@ -16,10 +16,6 @@ class Geo::JobArtifactRegistry < Geo::BaseRegistry
::Geo::JobArtifactRegistryFinder ::Geo::JobArtifactRegistryFinder
end end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range) def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range) finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end end
......
...@@ -21,10 +21,6 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry ...@@ -21,10 +21,6 @@ class Geo::LfsObjectRegistry < Geo::BaseRegistry
::Geo::LfsObjectRegistryFinder ::Geo::LfsObjectRegistryFinder
end end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range) def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range) finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end end
......
...@@ -11,9 +11,4 @@ class Geo::PackageFileRegistry < Geo::BaseRegistry ...@@ -11,9 +11,4 @@ class Geo::PackageFileRegistry < Geo::BaseRegistry
sha_attribute :verification_checksum sha_attribute :verification_checksum
sha_attribute :verification_checksum_mismatched sha_attribute :verification_checksum_mismatched
def self.delete_for_model_ids(package_file_ids)
# TODO: https://gitlab.com/gitlab-org/gitlab/-/issues/222635
[]
end
end end
...@@ -22,10 +22,6 @@ class Geo::UploadRegistry < Geo::BaseRegistry ...@@ -22,10 +22,6 @@ class Geo::UploadRegistry < Geo::BaseRegistry
::Geo::AttachmentRegistryFinder ::Geo::AttachmentRegistryFinder
end end
def self.delete_worker_class
::Geo::FileRegistryRemovalWorker
end
def self.find_registry_differences(range) def self.find_registry_differences(range)
finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range) finder_class.new(current_node_id: Gitlab::Geo.current_node.id).find_registry_differences(range)
end end
......
...@@ -26,15 +26,8 @@ module Geo ...@@ -26,15 +26,8 @@ module Geo
break break
end end
if file_path && File.exist?(file_path) destroy_file
log_info('Unlinking file', file_path: file_path) destroy_registry
File.unlink(file_path)
else
log_error('Unable to unlink file because file path is unknown. A file may be orphaned', object_type: object_type, object_db_id: object_db_id)
end
log_info('Removing file registry', file_registry_id: file_registry.id)
file_registry.destroy
log_info('Local file & registry removed') log_info('Local file & registry removed')
end end
...@@ -52,16 +45,48 @@ module Geo ...@@ -52,16 +45,48 @@ module Geo
::Geo::JobArtifactRegistry.find_by(artifact_id: object_db_id) ::Geo::JobArtifactRegistry.find_by(artifact_id: object_db_id)
elsif lfs? elsif lfs?
::Geo::LfsObjectRegistry.find_by(lfs_object_id: object_db_id) ::Geo::LfsObjectRegistry.find_by(lfs_object_id: object_db_id)
else elsif user_upload?
::Geo::UploadRegistry.find_by(file_type: object_type, file_id: object_db_id) ::Geo::UploadRegistry.find_by(file_type: object_type, file_id: object_db_id)
elsif replicator
replicator.registry
end end
end end
end end
# rubocop: enable CodeReuse/ActiveRecord # rubocop: enable CodeReuse/ActiveRecord
def destroy_file
if file_path && File.exist?(file_path)
log_info('Unlinking file', file_path: file_path)
File.unlink(file_path)
else
log_error('Unable to unlink file because file path is unknown. A file may be orphaned', object_type: object_type, object_db_id: object_db_id)
end
end
def destroy_registry
log_info('Removing file registry', file_registry_id: file_registry.id)
file_registry.destroy
end
def replicator
strong_memoize(:replicator) do
Gitlab::Geo::Replicator.for_replicable_params(replicable_name: object_type.to_s, replicable_id: object_db_id)
rescue NotImplementedError
nil
end
end
def blob_path_from_replicator
replicator.blob_path
rescue ActiveRecord::RecordNotFound
nil
end
def file_path def file_path
strong_memoize(:file_path) do strong_memoize(:file_path) do
next @object_file_path if @object_file_path next @object_file_path if @object_file_path
next blob_path_from_replicator if replicator
# When local storage is used, just rely on the existing methods # When local storage is used, just rely on the existing methods
next if file_uploader.nil? next if file_uploader.nil?
next file_uploader.file.path if file_uploader.object_store == ObjectStorage::Store::LOCAL next file_uploader.file.path if file_uploader.object_store == ObjectStorage::Store::LOCAL
......
...@@ -8,10 +8,12 @@ module Geo ...@@ -8,10 +8,12 @@ module Geo
loggable_arguments 0, 2 loggable_arguments 0, 2
def perform(object_type, object_db_id, file_path = nil) # This worker not only works for Self-Service Framework, it's also backward compatible
log_info('Executing Geo::FileRegistryRemovalService', id: object_db_id, type: object_type, file_path: file_path) # "object_type" is "replicable_name" and "object_db_id" is "replicable_id".
def perform(replicable_name, replicable_id, file_path = nil)
log_info('Executing Geo::FileRegistryRemovalService', id: replicable_id, type: replicable_name, file_path: file_path)
::Geo::FileRegistryRemovalService.new(object_type, object_db_id, file_path).execute ::Geo::FileRegistryRemovalService.new(replicable_name, replicable_id, file_path).execute
end end
end end
end end
...@@ -30,6 +30,10 @@ module Gitlab ...@@ -30,6 +30,10 @@ module Gitlab
define_method :replicator do define_method :replicator do
@_replicator ||= klass.new(model_record: self) @_replicator ||= klass.new(model_record: self)
end end
define_singleton_method :replicator_class do
@_replicator_class ||= klass
end
RUBY RUBY
end end
end end
......
...@@ -224,7 +224,7 @@ module Gitlab ...@@ -224,7 +224,7 @@ module Gitlab
# #
# @return [Geo::BaseRegistry] registry instance # @return [Geo::BaseRegistry] registry instance
def registry def registry
registry_class.for_model_record_id(model_record.id) registry_class.for_model_record_id(model_record_id)
end end
# Checksum value from the main database # Checksum value from the main database
......
...@@ -129,6 +129,42 @@ RSpec.describe Geo::FileRegistryRemovalService, :geo do ...@@ -129,6 +129,42 @@ RSpec.describe Geo::FileRegistryRemovalService, :geo do
end end
end end
shared_examples 'removes package file' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
before do
stub_exclusive_lease("file_registry_removal_service:package_file:#{registry.package_file_id}",
timeout: Geo::FileRegistryRemovalService::LEASE_TIMEOUT)
end
it 'file from disk' do
expect do
service.execute
end.to change { File.exist?(file_path) }.from(true).to(false)
end
it 'deletes registry entry' do
expect do
service.execute
end.to change(Geo::PackageFileRegistry, :count).by(-1)
end
end
shared_examples 'removes package file registry' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
before do
stub_exclusive_lease("file_registry_removal_service:package_file:#{registry.package_file_id}",
timeout: Geo::FileRegistryRemovalService::LEASE_TIMEOUT)
end
it 'deletes registry entry' do
expect do
service.execute
end.to change(Geo::PackageFileRegistry, :count).by(-1)
end
end
context 'with LFS object' do context 'with LFS object' do
let!(:lfs_object) { create(:lfs_object, :with_file) } let!(:lfs_object) { create(:lfs_object, :with_file) }
let!(:registry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object.id) } let!(:registry) { create(:geo_lfs_object_registry, lfs_object_id: lfs_object.id) }
...@@ -380,5 +416,39 @@ RSpec.describe Geo::FileRegistryRemovalService, :geo do ...@@ -380,5 +416,39 @@ RSpec.describe Geo::FileRegistryRemovalService, :geo do
end end
end end
end end
context 'with package file' do
let(:package_file) { create(:package_file_with_file) }
let!(:registry) { create(:geo_package_file_registry, package_file: package_file) }
let(:file_path) { Tempfile.new.path }
before do
allow_next_instance_of(Geo::PackageFileReplicator) do |replicator|
allow(replicator).to receive(:blob_path).and_return(file_path)
end
end
it_behaves_like 'removes package file'
context 'no package file record' do
before do
package_file.delete
end
it_behaves_like 'removes package file' do
subject(:service) { described_class.new('package_file', registry.package_file_id, file_path) }
end
end
context 'with orphaned registry' do
before do
package_file.delete
end
it_behaves_like 'removes package file registry' do
subject(:service) { described_class.new('package_file', registry.package_file_id) }
end
end
end
end end
end end
...@@ -156,81 +156,79 @@ RSpec.describe Geo::RegistryConsistencyService, :geo, :use_clean_rails_memory_st ...@@ -156,81 +156,79 @@ RSpec.describe Geo::RegistryConsistencyService, :geo, :use_clean_rails_memory_st
end end
end end
unless klass == Geo::PackageFileRegistry context 'when there are unused registries' do
context 'when there are unused registries' do context 'with no replicable records' do
context 'with no replicable records' do let(:records) { create_list(model_class_factory, batch_size) }
let(:records) { create_list(model_class_factory, batch_size) } let(:unused_model_ids) { records.map(&:id) }
let(:unused_model_ids) { records.map(&:id) }
let!(:registries) do
let!(:registries) do records.map do |record|
records.map do |record| create(registry_class_factory, model_foreign_key => record.id)
create(registry_class_factory, model_foreign_key => record.id)
end
end end
end
before do before do
model_class.where(id: unused_model_ids).delete_all model_class.where(id: unused_model_ids).delete_all
end end
it 'deletes unused registries', :sidekiq_inline do it 'deletes unused registries', :sidekiq_inline do
subject.execute subject.execute
expect(registry_class.where(model_foreign_key => unused_model_ids)).to be_empty expect(registry_class.where(model_foreign_key => unused_model_ids)).to be_empty
end end
it 'returns truthy' do it 'returns truthy' do
expect(subject.execute).to be_truthy expect(subject.execute).to be_truthy
end
end end
end
context 'when the unused registry foreign key ids are lower than the first replicable model id' do context 'when the unused registry foreign key ids are lower than the first replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) } let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.first].map(&:id) } let(:unused_registry_ids) { [records.first].map(&:id) }
let!(:registries) do let!(:registries) do
records.map do |record| records.map do |record|
create(registry_class_factory, model_foreign_key => record.id) create(registry_class_factory, model_foreign_key => record.id)
end
end end
end
before do before do
model_class.where(id: unused_registry_ids).delete_all model_class.where(id: unused_registry_ids).delete_all
end end
it 'deletes unused registries', :sidekiq_inline do it 'deletes unused registries', :sidekiq_inline do
subject.execute subject.execute
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end end
it 'returns truthy' do it 'returns truthy' do
expect(subject.execute).to be_truthy expect(subject.execute).to be_truthy
end
end end
end
context 'when the unused registry foreign key ids are greater than the last replicable model id' do context 'when the unused registry foreign key ids are greater than the last replicable model id' do
let(:records) { create_list(model_class_factory, batch_size) } let(:records) { create_list(model_class_factory, batch_size) }
let(:unused_registry_ids) { [records.last].map(&:id) } let(:unused_registry_ids) { [records.last].map(&:id) }
let!(:registries) do let!(:registries) do
records.map do |record| records.map do |record|
create(registry_class_factory, model_foreign_key => record.id) create(registry_class_factory, model_foreign_key => record.id)
end
end end
end
before do before do
model_class.where(id: unused_registry_ids).delete_all model_class.where(id: unused_registry_ids).delete_all
end end
it 'deletes unused registries', :sidekiq_inline do it 'deletes unused registries', :sidekiq_inline do
subject.execute subject.execute
expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty expect(registry_class.where(model_foreign_key => unused_registry_ids)).to be_empty
end end
it 'returns truthy' do it 'returns truthy' do
expect(subject.execute).to be_truthy expect(subject.execute).to be_truthy
end
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment