Commit 9b1319c8 authored by Jan Provaznik's avatar Jan Provaznik

Use FastDestroy for deleting uploads

It gathers list of file paths to delete before destroying
the parent object. Then after the parent_object is destroyed
these paths are scheduled for deletion asynchronously.

Carrierwave needed associated model for deleting upload file.
To avoid this requirement, simple Fog/File layer is used directly
for file deletion, this allows us to use just a simple list of paths.
parent 0dcac738
......@@ -70,6 +70,7 @@ module FastDestroyAll
module Helpers
extend ActiveSupport::Concern
include AfterCommitQueue
class_methods do
##
......
......@@ -17,6 +17,7 @@
module WithUploads
extend ActiveSupport::Concern
include FastDestroyAll::Helpers
# Currently there is no simple way how to select only not-mounted
# uploads, it should be all FileUploaders so we select them by
......@@ -25,18 +26,9 @@ module WithUploads
included do
has_many :uploads, as: :model
has_many :file_uploads, -> { where(uploader: FILE_UPLOADERS) }, class_name: 'Upload', as: :model, dependent: :delete_all # rubocop:disable Cop/ActiveRecordDependent
before_destroy :destroy_file_uploads
end
# mounted uploads are deleted in carrierwave's after_commit hook,
# but FileUploaders which are not mounted must be deleted explicitly and
# it can not be done in after_commit because FileUploader requires loads
# associated model on destroy (which is already deleted in after_commit)
def destroy_file_uploads
self.uploads.where(uploader: FILE_UPLOADERS).find_each do |upload|
upload.destroy
end
use_fast_destroy :file_uploads
end
def retrieve_upload(_identifier, paths)
......
......@@ -25,6 +25,25 @@ class Upload < ActiveRecord::Base
Digest::SHA256.file(path).hexdigest
end
class << self
##
# FastDestroyAll concerns
def begin_fast_destroy
{
Uploads::Local => Uploads::Local.new.keys(with_files_stored_locally),
Uploads::Fog => Uploads::Fog.new.keys(with_files_stored_remotely)
}
end
##
# FastDestroyAll concerns
def finalize_fast_destroy(keys)
keys.each do |store_class, paths|
store_class.new.delete_keys_async(paths)
end
end
end
def absolute_path
raise ObjectStorage::RemoteStoreError, "Remote object has no absolute path." unless local?
return path unless relative_path?
......
# frozen_string_literal: true
module Uploads
class Base
BATCH_SIZE = 100
attr_reader :logger
def initialize(logger: nil)
@logger ||= Rails.logger
end
def delete_keys_async(keys_to_delete)
keys_to_delete.each_slice(BATCH_SIZE) do |batch|
DeleteStoredFilesWorker.perform_async(self.class, batch)
end
end
end
end
# frozen_string_literal: true
module Uploads
class Fog < Base
include ::Gitlab::Utils::StrongMemoize
def available?
object_store.enabled
end
def keys(relation)
return [] unless available?
relation.pluck(:path)
end
def delete_keys(keys)
keys.each do |key|
connection.delete_object(bucket_name, key)
end
end
private
def object_store
Gitlab.config.uploads.object_store
end
def bucket_name
return unless available?
object_store.remote_directory
end
def connection
return unless available?
strong_memoize(:connection) do
::Fog::Storage.new(object_store.connection.to_hash.deep_symbolize_keys)
end
end
end
end
# frozen_string_literal: true
module Uploads
class Local < Base
def keys(relation)
relation.includes(:model).find_each.map {|u| u.absolute_path }
end
def delete_keys(keys)
keys.each do |path|
delete_file(path)
end
end
private
def delete_file(path)
unless exists?(path)
logger.warn("File '#{path}' doesn't exist, skipping")
return
end
unless in_uploads?(path)
message = "Path '#{path}' is not in uploads dir, skipping"
logger.warn(message)
Gitlab::Sentry.track_exception(RuntimeError.new(message), extra: { uploads_dir: storage_dir })
return
end
FileUtils.rm(path)
delete_dir!(File.dirname(path))
end
def exists?(path)
path.present? && File.exist?(path)
end
def in_uploads?(path)
path.start_with?(storage_dir)
end
def delete_dir!(path)
Dir.rmdir(path)
rescue Errno::ENOENT
# Ignore: path does not exist
rescue Errno::ENOTDIR
# Ignore: path is not a dir
rescue Errno::ENOTEMPTY, Errno::EEXIST
# Ignore: dir is not empty
end
def storage_dir
@storage_dir ||= File.realpath(Gitlab.config.uploads.storage_path)
end
end
end
......@@ -134,3 +134,4 @@
- delete_diff_files
- detect_repository_languages
- repository_cleanup
- delete_stored_files
# frozen_string_literal: true
class DeleteStoredFilesWorker
include ApplicationWorker
def perform(class_name, keys)
klass = begin
class_name.constantize
rescue NameError
nil
end
unless klass
message = "Unknown class '#{class_name}'"
logger.error(message)
Gitlab::Sentry.track_exception(RuntimeError.new(message))
return
end
klass.new(logger: logger).delete_keys(keys)
end
end
---
title: Delete file uploads asynchronously when deleting a project or other resources.
merge_request: 20977
author:
type: added
......@@ -82,6 +82,7 @@
- [detect_repository_languages, 1]
- [auto_devops, 2]
- [repository_cleanup, 1]
- [delete_stored_files, 1]
# EE-specific queues
- [ldap_group_sync, 2]
......
......@@ -287,6 +287,7 @@ project:
- statistics
- container_repositories
- uploads
- file_uploads
- import_state
- members_and_requesters
- build_trace_section_names
......
......@@ -20,7 +20,7 @@ describe Appearance do
end
context 'with uploads' do
it_behaves_like 'model with mounted uploader', false do
it_behaves_like 'model with uploads', false do
let(:model_object) { create(:appearance, :with_logo) }
let(:upload_attribute) { :logo }
let(:uploader_class) { AttachmentUploader }
......
......@@ -739,7 +739,7 @@ describe Group do
end
context 'with uploads' do
it_behaves_like 'model with mounted uploader', true do
it_behaves_like 'model with uploads', true do
let(:model_object) { create(:group, :with_avatar) }
let(:upload_attribute) { :avatar }
let(:uploader_class) { AttachmentUploader }
......
......@@ -4202,7 +4202,7 @@ describe Project do
end
context 'with uploads' do
it_behaves_like 'model with mounted uploader', true do
it_behaves_like 'model with uploads', true do
let(:model_object) { create(:project, :with_avatar) }
let(:upload_attribute) { :avatar }
let(:uploader_class) { AttachmentUploader }
......
# frozen_string_literal: true
require 'spec_helper'
describe Uploads::Fog do
let(:data_store) { described_class.new }
before do
stub_uploads_object_storage(FileUploader)
end
describe '#available?' do
subject { data_store.available? }
context 'when object storage is enabled' do
it { is_expected.to be_truthy }
end
context 'when object storage is disabled' do
before do
stub_uploads_object_storage(FileUploader, enabled: false)
end
it { is_expected.to be_falsy }
end
end
context 'model with uploads' do
let(:project) { create(:project) }
let(:relation) { project.uploads }
describe '#keys' do
let!(:uploads) { create_list(:upload, 2, :object_storage, uploader: FileUploader, model: project) }
subject { data_store.keys(relation) }
it 'returns keys' do
is_expected.to match_array(relation.pluck(:path))
end
end
describe '#delete_keys' do
let(:keys) { data_store.keys(relation) }
let!(:uploads) { create_list(:upload, 2, :with_file, :issuable_upload, model: project) }
subject { data_store.delete_keys(keys) }
before do
uploads.each { |upload| upload.build_uploader.migrate!(2) }
end
it 'deletes multiple data' do
paths = relation.pluck(:path)
::Fog::Storage.new(FileUploader.object_store_credentials).tap do |connection|
paths.each do |path|
expect(connection.get_object('uploads', path)[:body]).not_to be_nil
end
end
subject
::Fog::Storage.new(FileUploader.object_store_credentials).tap do |connection|
paths.each do |path|
expect { connection.get_object('uploads', path)[:body] }.to raise_error(Excon::Error::NotFound)
end
end
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Uploads::Local do
let(:data_store) { described_class.new }
before do
stub_uploads_object_storage(FileUploader)
end
context 'model with uploads' do
let(:project) { create(:project) }
let(:relation) { project.uploads }
describe '#keys' do
let!(:uploads) { create_list(:upload, 2, uploader: FileUploader, model: project) }
subject { data_store.keys(relation) }
it 'returns keys' do
is_expected.to match_array(relation.map(&:absolute_path))
end
end
describe '#delete_keys' do
let(:keys) { data_store.keys(relation) }
let!(:uploads) { create_list(:upload, 2, :with_file, :issuable_upload, model: project) }
subject { data_store.delete_keys(keys) }
it 'deletes multiple data' do
paths = relation.map(&:absolute_path)
paths.each do |path|
expect(File.exist?(path)).to be_truthy
end
subject
paths.each do |path|
expect(File.exist?(path)).to be_falsey
end
end
end
end
end
......@@ -3313,7 +3313,7 @@ describe User do
end
context 'with uploads' do
it_behaves_like 'model with mounted uploader', false do
it_behaves_like 'model with uploads', false do
let(:model_object) { create(:user, :with_avatar) }
let(:upload_attribute) { :avatar }
let(:uploader_class) { AttachmentUploader }
......
require 'spec_helper'
shared_examples_for 'model with mounted uploader' do |supports_fileuploads|
shared_examples_for 'model with uploads' do |supports_fileuploads|
describe '.destroy' do
before do
stub_uploads_object_storage(uploader_class)
......@@ -8,16 +8,42 @@ shared_examples_for 'model with mounted uploader' do |supports_fileuploads|
model_object.public_send(upload_attribute).migrate!(ObjectStorage::Store::REMOTE)
end
context 'with mounted uploader' do
it 'deletes remote uploads' do
expect_any_instance_of(CarrierWave::Storage::Fog::File).to receive(:delete).and_call_original
expect { model_object.destroy }.to change { Upload.count }.by(-1)
end
end
context 'with not mounted uploads', :sidekiq, skip: !supports_fileuploads do
context 'with local files' do
let!(:uploads) { create_list(:upload, 2, uploader: FileUploader, model: model_object) }
it 'deletes any FileUploader uploads which are not mounted' do
expect { model_object.destroy }.to change { Upload.count }.by(-3)
end
it 'deletes any FileUploader uploads which are not mounted', skip: !supports_fileuploads do
create(:upload, uploader: FileUploader, model: model_object)
it 'deletes local files' do
expect_any_instance_of(Uploads::Local).to receive(:delete_keys).with(uploads.map(&:absolute_path))
model_object.destroy
end
end
expect { model_object.destroy }.to change { Upload.count }.by(-2)
context 'with remote files' do
let!(:uploads) { create_list(:upload, 2, :object_storage, uploader: FileUploader, model: model_object) }
it 'deletes any FileUploader uploads which are not mounted' do
expect { model_object.destroy }.to change { Upload.count }.by(-3)
end
it 'deletes remote files' do
expect_any_instance_of(Uploads::Fog).to receive(:delete_keys).with(uploads.map(&:path))
model_object.destroy
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment