Commit 8d5f875c authored by Stan Hu's avatar Stan Hu Committed by Mayra Cabrera

Fix project exports clobbering concurrent export paths

When a project export completes, it removes everything in
`Project#import_export_shared.archive_path`, which can erase files
needed for another ongoing project export. This is problematic for
custom templates, which exports an existing project to get the most
recent changes and imports that archive to another project.

To avoid this from happening, we generate a random unique subpath in the
shared temporary directory so that multiple exports can work at the same
time.

Previously the path structure was as follows:

1. Project export files stored in:
   /shared/tmp/project_exports/namespace/project/:random
2. Project export .tar.gz files stored in:
   /shared/tmp/project_exports/namespace/project
3. Project export lock file:
   /shared/tmp/project_exports/namespace/project/.after_export_action

Now:

1. Project export files stored in:
   /shared/tmp/project_exports/namespace/project/:randomA/:randomB
2. Project export .tar.gz files stored in:
   /shared/tmp/project_exports/namespace/project/:randomA
3. Project export lock files stored in:
   /shared/tmp/project_exports/namespace/project/locks

The .tar.gz files are also now cleaned up in the AfterExportStrategy.

Also, ensure import/export path cleanup always happens.

A failure to update the database or object storage shouldn't
block us from cleaning up stale directories. This is especially
important to clear out stale lock file and archive paths.

Closes https://gitlab.com/gitlab-org/gitlab-ee/issues/14716
parent be360f19
...@@ -12,16 +12,20 @@ class ImportExportCleanUpService ...@@ -12,16 +12,20 @@ class ImportExportCleanUpService
def execute def execute
Gitlab::Metrics.measure(:import_export_clean_up) do Gitlab::Metrics.measure(:import_export_clean_up) do
clean_up_export_object_files execute_cleanup
break unless File.directory?(path)
clean_up_export_files
end end
end end
private private
def execute_cleanup
clean_up_export_object_files
ensure
# We don't want a failure in cleaning up object storage from
# blocking us from cleaning up temporary storage.
clean_up_export_files if File.directory?(path)
end
def clean_up_export_files def clean_up_export_files
Gitlab::Popen.popen(%W(find #{path} -not -path #{path} -mmin +#{mmin} -delete)) Gitlab::Popen.popen(%W(find #{path} -not -path #{path} -mmin +#{mmin} -delete))
end end
......
---
title: Fix project exports clobbering concurrent export paths
merge_request: 16280
author:
type: fixed
...@@ -30,7 +30,6 @@ module EE ...@@ -30,7 +30,6 @@ module EE
::RepositoryImportWorker.new.perform(export_into_project_id) ::RepositoryImportWorker.new.perform(export_into_project_id)
ensure ensure
export_file.close if export_file.respond_to?(:close) export_file.close if export_file.respond_to?(:close)
project.remove_exports
end end
def export_file def export_file
......
...@@ -10,11 +10,9 @@ module Gitlab ...@@ -10,11 +10,9 @@ module Gitlab
StrategyError = Class.new(StandardError) StrategyError = Class.new(StandardError)
AFTER_EXPORT_LOCK_FILE_NAME = '.after_export_action'
private private
attr_reader :project, :current_user attr_reader :project, :current_user, :lock_file
public public
...@@ -29,8 +27,9 @@ module Gitlab ...@@ -29,8 +27,9 @@ module Gitlab
def execute(current_user, project) def execute(current_user, project)
@project = project @project = project
return unless @project.export_status == :finished ensure_export_ready!
ensure_lock_files_path!
@lock_file = File.join(lock_files_path, SecureRandom.hex)
@current_user = current_user @current_user = current_user
if invalid? if invalid?
...@@ -48,19 +47,32 @@ module Gitlab ...@@ -48,19 +47,32 @@ module Gitlab
false false
ensure ensure
delete_after_export_lock delete_after_export_lock
delete_export_file
delete_archive_path
end end
def to_json(options = {}) def to_json(options = {})
@options.to_h.merge!(klass: self.class.name).to_json @options.to_h.merge!(klass: self.class.name).to_json
end end
def self.lock_file_path(project) def ensure_export_ready!
return unless project.export_path || export_file_exists? raise StrategyError unless project.export_file_exists?
end
def ensure_lock_files_path!
FileUtils.mkdir_p(lock_files_path) unless Dir.exist?(lock_files_path)
end
def lock_files_path
project.import_export_shared.lock_files_path
end
lock_path = project.import_export_shared.archive_path def archive_path
project.import_export_shared.archive_path
end
mkdir_p(lock_path) def locks_present?
File.join(lock_path, AFTER_EXPORT_LOCK_FILE_NAME) project.import_export_shared.locks_present?
end end
protected protected
...@@ -69,25 +81,33 @@ module Gitlab ...@@ -69,25 +81,33 @@ module Gitlab
raise NotImplementedError raise NotImplementedError
end end
def delete_export?
true
end
private private
def delete_export_file
return if locks_present? || !delete_export?
project.remove_exports
end
def delete_archive_path
FileUtils.rm_rf(archive_path) if File.directory?(archive_path)
end
def create_or_update_after_export_lock def create_or_update_after_export_lock
FileUtils.touch(self.class.lock_file_path(project)) FileUtils.touch(lock_file)
end end
def delete_after_export_lock def delete_after_export_lock
lock_file = self.class.lock_file_path(project)
FileUtils.rm(lock_file) if lock_file.present? && File.exist?(lock_file) FileUtils.rm(lock_file) if lock_file.present? && File.exist?(lock_file)
end end
def log_validation_errors def log_validation_errors
errors.full_messages.each { |msg| project.import_export_shared.add_error_message(msg) } errors.full_messages.each { |msg| project.import_export_shared.add_error_message(msg) }
end end
def export_file_exists?
project.export_file_exists?
end
end end
end end
end end
......
...@@ -4,6 +4,12 @@ module Gitlab ...@@ -4,6 +4,12 @@ module Gitlab
module ImportExport module ImportExport
module AfterExportStrategies module AfterExportStrategies
class DownloadNotificationStrategy < BaseAfterExportStrategy class DownloadNotificationStrategy < BaseAfterExportStrategy
protected
def delete_export?
false
end
private private
def strategy_execute def strategy_execute
......
...@@ -24,8 +24,6 @@ module Gitlab ...@@ -24,8 +24,6 @@ module Gitlab
def strategy_execute def strategy_execute
handle_response_error(send_file) handle_response_error(send_file)
project.remove_exports
end end
def handle_response_error(response) def handle_response_error(response)
......
# frozen_string_literal: true # frozen_string_literal: true
#
# This class encapsulates the directories used by project import/export:
#
# 1. The project export job first generates the project metadata tree
# (e.g. `project.json) and repository bundle (e.g. `project.bundle`)
# inside a temporary `export_path`
# (e.g. /path/to/shared/tmp/project_exports/namespace/project/:randomA/:randomB).
#
# 2. The job then creates a tarball (e.g. `project.tar.gz`) in
# `archive_path` (e.g. /path/to/shared/tmp/project_exports/namespace/project/:randomA).
# CarrierWave moves this tarball files into its permanent location.
#
# 3. Lock files are used to indicate whether a project is in the
# `after_export` state. These are stored in a directory
# (e.g. /path/to/shared/tmp/project_exports/namespace/project/locks. The
# number of lock files present signifies how many concurrent project
# exports are running. Note that this assumes the temporary directory
# is a shared mount:
# https://gitlab.com/gitlab-org/gitlab/issues/32203
#
# NOTE: Stale files should be cleaned up via ImportExportCleanupService.
module Gitlab module Gitlab
module ImportExport module ImportExport
class Shared class Shared
attr_reader :errors, :project attr_reader :errors, :project
LOCKS_DIRECTORY = 'locks'
def initialize(project) def initialize(project)
@project = project @project = project
@errors = [] @errors = []
...@@ -12,17 +34,27 @@ module Gitlab ...@@ -12,17 +34,27 @@ module Gitlab
end end
def active_export_count def active_export_count
Dir[File.join(archive_path, '*')].count { |name| File.directory?(name) } Dir[File.join(base_path, '*')].count { |name| File.basename(name) != LOCKS_DIRECTORY && File.directory?(name) }
end end
# The path where the project metadata and repository bundle is saved
def export_path def export_path
@export_path ||= Gitlab::ImportExport.export_path(relative_path: relative_path) @export_path ||= Gitlab::ImportExport.export_path(relative_path: relative_path)
end end
# The path where the tarball is saved
def archive_path def archive_path
@archive_path ||= Gitlab::ImportExport.export_path(relative_path: relative_archive_path) @archive_path ||= Gitlab::ImportExport.export_path(relative_path: relative_archive_path)
end end
def base_path
@base_path ||= Gitlab::ImportExport.export_path(relative_path: relative_base_path)
end
def lock_files_path
@locks_files_path ||= File.join(base_path, LOCKS_DIRECTORY)
end
def error(error) def error(error)
log_error(message: error.message, caller: caller[0].dup) log_error(message: error.message, caller: caller[0].dup)
log_debug(backtrace: error.backtrace&.join("\n")) log_debug(backtrace: error.backtrace&.join("\n"))
...@@ -37,16 +69,24 @@ module Gitlab ...@@ -37,16 +69,24 @@ module Gitlab
end end
def after_export_in_progress? def after_export_in_progress?
File.exist?(after_export_lock_file) locks_present?
end
def locks_present?
Dir.exist?(lock_files_path) && !Dir.empty?(lock_files_path)
end end
private private
def relative_path def relative_path
File.join(relative_archive_path, SecureRandom.hex) @relative_path ||= File.join(relative_archive_path, SecureRandom.hex)
end end
def relative_archive_path def relative_archive_path
@relative_archive_path ||= File.join(@project.disk_path, SecureRandom.hex)
end
def relative_base_path
@project.disk_path @project.disk_path
end end
...@@ -70,10 +110,6 @@ module Gitlab ...@@ -70,10 +110,6 @@ module Gitlab
def filtered_error_message(message) def filtered_error_message(message)
Projects::ImportErrorFilter.filter_message(message) Projects::ImportErrorFilter.filter_message(message)
end end
def after_export_lock_file
AfterExportStrategies::BaseAfterExportStrategy.lock_file_path(project)
end
end end
end end
end end
...@@ -49,8 +49,7 @@ describe 'Import/Export - project export integration test', :js do ...@@ -49,8 +49,7 @@ describe 'Import/Export - project export integration test', :js do
expect(page).to have_content('Download export') expect(page).to have_content('Download export')
expect(file_permissions(project.export_path)).to eq(0700) expect(project.export_status).to eq(:finished)
expect(project.export_file.path).to include('tar.gz') expect(project.export_file.path).to include('tar.gz')
in_directory_with_expanded_export(project) do |exit_status, tmpdir| in_directory_with_expanded_export(project) do |exit_status, tmpdir|
......
...@@ -24,14 +24,22 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do ...@@ -24,14 +24,22 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do
service.execute(user, project) service.execute(user, project)
expect(lock_path_exist?).to be_truthy expect(service.locks_present?).to be_truthy
end end
context 'when the method succeeds' do context 'when the method succeeds' do
it 'removes the lock file' do it 'removes the lock file' do
service.execute(user, project) service.execute(user, project)
expect(lock_path_exist?).to be_falsey expect(service.locks_present?).to be_falsey
end
it 'removes the archive path' do
FileUtils.mkdir_p(shared.archive_path)
service.execute(user, project)
expect(File.exist?(shared.archive_path)).to be_falsey
end end
end end
...@@ -62,13 +70,21 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do ...@@ -62,13 +70,21 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do
service.execute(user, project) service.execute(user, project)
end end
it 'removes the archive path' do
FileUtils.mkdir_p(shared.archive_path)
service.execute(user, project)
expect(File.exist?(shared.archive_path)).to be_falsey
end
end end
context 'when an exception is raised' do context 'when an exception is raised' do
it 'removes the lock' do it 'removes the lock' do
expect { service.execute(user, project) }.to raise_error(NotImplementedError) expect { service.execute(user, project) }.to raise_error(NotImplementedError)
expect(lock_path_exist?).to be_falsey expect(service.locks_present?).to be_falsey
end end
end end
end end
...@@ -97,8 +113,4 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do ...@@ -97,8 +113,4 @@ describe Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy do
expect(described_class.new(params).to_json).to eq result expect(described_class.new(params).to_json).to eq result
end end
end end
def lock_path_exist?
File.exist?(described_class.lock_file_path(project))
end
end end
...@@ -5,6 +5,35 @@ describe Gitlab::ImportExport::Shared do ...@@ -5,6 +5,35 @@ describe Gitlab::ImportExport::Shared do
let(:project) { build(:project) } let(:project) { build(:project) }
subject { project.import_export_shared } subject { project.import_export_shared }
context 'with a repository on disk' do
let(:project) { create(:project, :repository) }
let(:base_path) { %(/tmp/project_exports/#{project.disk_path}/) }
describe '#archive_path' do
it 'uses a random hash to avoid conflicts' do
expect(subject.archive_path).to match(/#{base_path}\h{32}/)
end
it 'memoizes the path' do
path = subject.archive_path
2.times { expect(subject.archive_path).to eq(path) }
end
end
describe '#export_path' do
it 'uses a random hash relative to project path' do
expect(subject.export_path).to match(/#{base_path}\h{32}\/\h{32}/)
end
it 'memoizes the path' do
path = subject.export_path
2.times { expect(subject.export_path).to eq(path) }
end
end
end
describe '#error' do describe '#error' do
let(:error) { StandardError.new('Error importing into /my/folder Permission denied @ unlink_internal - /var/opt/gitlab/gitlab-rails/shared/a/b/c/uploads/file') } let(:error) { StandardError.new('Error importing into /my/folder Permission denied @ unlink_internal - /var/opt/gitlab/gitlab-rails/shared/a/b/c/uploads/file') }
......
...@@ -30,7 +30,7 @@ describe API::ProjectExport do ...@@ -30,7 +30,7 @@ describe API::ProjectExport do
FileUtils.mkdir_p File.join(project_started.export_path, 'securerandom-hex') FileUtils.mkdir_p File.join(project_started.export_path, 'securerandom-hex')
# simulate in after export action # simulate in after export action
FileUtils.touch Gitlab::ImportExport::AfterExportStrategies::BaseAfterExportStrategy.lock_file_path(project_after_export) FileUtils.touch File.join(project_after_export.import_export_shared.lock_files_path, SecureRandom.hex)
end end
after do after do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment