Commit 3b3832c0 authored by Robert Speicher's avatar Robert Speicher

Merge branch 'port_repo_backup_to_gl_repo' into 'master'

Port repository backup/restore to use GlRepository

See merge request gitlab-org/gitlab!40682
parents 9d0d01c6 15daeb96
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
require 'yaml' require 'yaml'
module Backup module Backup
class Repository class Repositories
attr_reader :progress attr_reader :progress
def initialize(progress) def initialize(progress)
...@@ -41,98 +41,23 @@ module Backup ...@@ -41,98 +41,23 @@ module Backup
raise errors.pop unless errors.empty? raise errors.pop unless errors.empty?
end end
def backup_project(project)
path_to_project_bundle = path_to_bundle(project)
Gitlab::GitalyClient::RepositoryService.new(project.repository)
.create_bundle(path_to_project_bundle)
backup_custom_hooks(project)
rescue => e
progress_warn(project, e, 'Failed to backup repo')
end
def backup_custom_hooks(project)
FileUtils.mkdir_p(project_backup_path(project))
custom_hooks_path = custom_hooks_tar(project)
Gitlab::GitalyClient::RepositoryService.new(project.repository)
.backup_custom_hooks(custom_hooks_path)
end
def restore_custom_hooks(project)
return unless Dir.exist?(project_backup_path(project))
return if Dir.glob("#{project_backup_path(project)}/custom_hooks*").none?
custom_hooks_path = custom_hooks_tar(project)
Gitlab::GitalyClient::RepositoryService.new(project.repository)
.restore_custom_hooks(custom_hooks_path)
end
def restore def restore
Project.find_each(batch_size: 1000) do |project| Project.find_each(batch_size: 1000) do |project|
progress.print " * #{project.full_path} ... " restore_repository(project, Gitlab::GlRepository::PROJECT)
restore_repository(project, Gitlab::GlRepository::WIKI)
restore_repo_success =
begin
try_restore_repository(project)
rescue => err
progress.puts "Error: #{err}".color(:red)
false
end
if restore_repo_success
progress.puts "[DONE]".color(:green)
else
progress.puts "[Failed] restoring #{project.full_path} repository".color(:red)
end
wiki = ProjectWiki.new(project)
wiki.repository.remove rescue nil
path_to_wiki_bundle = path_to_bundle(wiki)
if File.exist?(path_to_wiki_bundle)
progress.print " * #{wiki.full_path} ... "
begin
wiki.repository.create_from_bundle(path_to_wiki_bundle)
restore_custom_hooks(wiki)
progress.puts "[DONE]".color(:green)
rescue => e
progress.puts "[Failed] restoring #{wiki.full_path} wiki".color(:red)
progress.puts "Error #{e}".color(:red)
end
end
end end
restore_object_pools restore_object_pools
end end
protected private
def try_restore_repository(project)
path_to_project_bundle = path_to_bundle(project)
project.repository.remove rescue nil
if File.exist?(path_to_project_bundle)
project.repository.create_from_bundle(path_to_project_bundle)
restore_custom_hooks(project)
else
project.repository.create_repository
end
true
end
def path_to_bundle(project)
File.join(backup_repos_path, project.disk_path + '.bundle')
end
def project_backup_path(project)
File.join(backup_repos_path, project.disk_path)
end
def custom_hooks_tar(project) def restore_repository(container, type)
File.join(project_backup_path(project), "custom_hooks.tar") BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).restore(always_create: type.project?)
end end
def backup_repos_path def backup_repos_path
...@@ -145,8 +70,6 @@ module Backup ...@@ -145,8 +70,6 @@ module Backup
FileUtils.mkdir(backup_repos_path, mode: 0700) FileUtils.mkdir(backup_repos_path, mode: 0700)
end end
private
def dump_consecutive def dump_consecutive
Project.includes(:route, :group, namespace: :owner).find_each(batch_size: 1000) do |project| Project.includes(:route, :group, namespace: :owner).find_each(batch_size: 1000) do |project|
dump_project(project) dump_project(project)
...@@ -193,54 +116,100 @@ module Backup ...@@ -193,54 +116,100 @@ module Backup
end end
def dump_project(project) def dump_project(project)
progress.puts " * #{display_repo_path(project)} ... " backup_repository(project, Gitlab::GlRepository::PROJECT)
backup_repository(project, Gitlab::GlRepository::WIKI)
end
if project.hashed_storage?(:repository) def backup_repository(container, type)
FileUtils.mkdir_p(File.dirname(File.join(backup_repos_path, project.disk_path))) BackupRestore.new(
else progress,
FileUtils.mkdir_p(File.join(backup_repos_path, project.namespace.full_path)) if project.namespace type.repository_for(container),
backup_repos_path
).backup
end end
if !empty_repo?(project) def restore_object_pools
backup_project(project) PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " * #{display_repo_path(project)} ... " + "[DONE]".color(:green) progress.puts " - Object pool #{pool.disk_path}..."
else
progress.puts " * #{display_repo_path(project)} ... " + "[SKIPPED]".color(:cyan) pool.source_project ||= pool.member_projects.first.root_of_fork_network
pool.state = 'none'
pool.save
pool.schedule
end
end end
wiki = ProjectWiki.new(project) class BackupRestore
attr_accessor :progress, :repository, :backup_repos_path
if !empty_repo?(wiki) def initialize(progress, repository, backup_repos_path)
backup_project(wiki) @progress = progress
progress.puts " * #{display_repo_path(project)} ... " + "[DONE] Wiki".color(:green) @repository = repository
else @backup_repos_path = backup_repos_path
progress.puts " * #{display_repo_path(project)} ... " + "[SKIPPED] Wiki".color(:cyan)
end end
def backup
progress.puts " * #{display_repo_path} ... "
if repository.empty?
progress.puts " * #{display_repo_path} ... " + "[SKIPPED]".color(:cyan)
return
end end
def progress_warn(project, cmd, output) FileUtils.mkdir_p(repository_backup_path)
progress.puts "[WARNING] Executing #{cmd}".color(:orange)
progress.puts "Ignoring error on #{display_repo_path(project)} - #{output}".color(:orange) repository.bundle_to_disk(path_to_bundle)
repository.gitaly_repository_client.backup_custom_hooks(custom_hooks_tar)
progress.puts " * #{display_repo_path} ... " + "[DONE]".color(:green)
rescue => e
progress.puts "[Failed] backing up #{display_repo_path}".color(:red)
progress.puts "Error #{e}".color(:red)
end end
def empty_repo?(project_or_wiki) def restore(always_create: false)
project_or_wiki.repository.expire_emptiness_caches progress.puts " * #{display_repo_path} ... "
project_or_wiki.repository.empty?
repository.remove rescue nil
if File.exist?(path_to_bundle)
repository.create_from_bundle(path_to_bundle)
restore_custom_hooks
elsif always_create
repository.create_repository
end end
def display_repo_path(project) progress.puts " * #{display_repo_path} ... " + "[DONE]".color(:green)
project.hashed_storage?(:repository) ? "#{project.full_path} (#{project.disk_path})" : project.full_path
rescue => e
progress.puts "[Failed] restoring #{display_repo_path}".color(:red)
progress.puts "Error #{e}".color(:red)
end end
def restore_object_pools private
PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " - Object pool #{pool.disk_path}..."
pool.source_project ||= pool.member_projects.first.root_of_fork_network def display_repo_path
pool.state = 'none' "#{repository.full_path} (#{repository.disk_path})"
pool.save end
pool.schedule def repository_backup_path
@repository_backup_path ||= File.join(backup_repos_path, repository.disk_path)
end
def path_to_bundle
@path_to_bundle ||= File.join(backup_repos_path, repository.disk_path + '.bundle')
end
def restore_custom_hooks
return unless File.exist?(custom_hooks_tar)
repository.gitaly_repository_client.restore_custom_hooks(custom_hooks_tar)
end
def custom_hooks_tar
File.join(repository_backup_path, "custom_hooks.tar")
end end
end end
......
...@@ -107,7 +107,7 @@ namespace :gitlab do ...@@ -107,7 +107,7 @@ namespace :gitlab do
puts "GITLAB_BACKUP_MAX_CONCURRENCY and GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY must have a value of at least 1".color(:red) puts "GITLAB_BACKUP_MAX_CONCURRENCY and GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY must have a value of at least 1".color(:red)
exit 1 exit 1
else else
Backup::Repository.new(progress).dump( Backup::Repositories.new(progress).dump(
max_concurrency: max_concurrency, max_concurrency: max_concurrency,
max_storage_concurrency: max_storage_concurrency max_storage_concurrency: max_storage_concurrency
) )
...@@ -117,7 +117,7 @@ namespace :gitlab do ...@@ -117,7 +117,7 @@ namespace :gitlab do
task restore: :gitlab_environment do task restore: :gitlab_environment do
puts_time "Restoring repositories ...".color(:blue) puts_time "Restoring repositories ...".color(:blue)
Backup::Repository.new(progress).restore Backup::Repositories.new(progress).restore
puts_time "done".color(:green) puts_time "done".color(:green)
end end
end end
......
...@@ -2,9 +2,7 @@ ...@@ -2,9 +2,7 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Backup::Repository do RSpec.describe Backup::Repositories do
let_it_be(:project) { create(:project, :wiki_repo) }
let(:progress) { StringIO.new } let(:progress) { StringIO.new }
subject { described_class.new(progress) } subject { described_class.new(progress) }
...@@ -12,7 +10,6 @@ RSpec.describe Backup::Repository do ...@@ -12,7 +10,6 @@ RSpec.describe Backup::Repository do
before do before do
allow(progress).to receive(:puts) allow(progress).to receive(:puts)
allow(progress).to receive(:print) allow(progress).to receive(:print)
allow(FileUtils).to receive(:mv).and_return(true)
allow_next_instance_of(described_class) do |instance| allow_next_instance_of(described_class) do |instance|
allow(instance).to receive(:progress).and_return(progress) allow(instance).to receive(:progress).and_return(progress)
...@@ -24,11 +21,19 @@ RSpec.describe Backup::Repository do ...@@ -24,11 +21,19 @@ RSpec.describe Backup::Repository do
allow(Gitlab.config.repositories.storages).to receive(:keys).and_return(storage_keys) allow(Gitlab.config.repositories.storages).to receive(:keys).and_return(storage_keys)
end end
let_it_be(:projects) { create_list(:project, 5, :wiki_repo) + [project] } let_it_be(:projects) { create_list(:project, 5, :repository, :wiki_repo) }
let(:storage_keys) { %w[default test_second_storage] } let(:storage_keys) { %w[default test_second_storage] }
context 'no concurrency' do context 'no concurrency' do
it 'creates repository bundle' do
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
projects.each do |project|
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
end
end
it 'creates the expected number of threads' do it 'creates the expected number of threads' do
expect(Thread).not_to receive(:new) expect(Thread).not_to receive(:new)
...@@ -58,12 +63,22 @@ RSpec.describe Backup::Repository do ...@@ -58,12 +63,22 @@ RSpec.describe Backup::Repository do
subject.dump(max_concurrency: 1, max_storage_concurrency: 1) subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end.count end.count
create_list(:project, 2, :wiki_repo) create_list(:project, 2, :repository, :wiki_repo)
expect do expect do
subject.dump(max_concurrency: 1, max_storage_concurrency: 1) subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
end.not_to exceed_query_limit(control_count) end.not_to exceed_query_limit(control_count)
end end
context 'legacy storage' do
let_it_be(:project) { create(:project, :repository, :legacy_storage, :wiki_repo) }
it 'creates repository bundle' do
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
end
end
end end
[4, 10].each do |max_storage_concurrency| [4, 10].each do |max_storage_concurrency|
...@@ -120,7 +135,7 @@ RSpec.describe Backup::Repository do ...@@ -120,7 +135,7 @@ RSpec.describe Backup::Repository do
subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency) subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency)
end.count end.count
create_list(:project, 2, :wiki_repo) create_list(:project, 2, :repository, :wiki_repo)
expect do expect do
subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency) subject.dump(max_concurrency: 1, max_storage_concurrency: max_storage_concurrency)
...@@ -131,45 +146,32 @@ RSpec.describe Backup::Repository do ...@@ -131,45 +146,32 @@ RSpec.describe Backup::Repository do
end end
describe '#restore' do describe '#restore' do
let(:timestamp) { Time.utc(2017, 3, 22) } let_it_be(:project) { create(:project, :wiki_repo) }
let(:temp_dirs) do
Gitlab.config.repositories.storages.map do |name, storage|
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
File.join(storage.legacy_disk_path, '..', 'repositories.old.' + timestamp.to_i.to_s)
end
end
end
around do |example|
Timecop.freeze(timestamp) { example.run }
end
after do
temp_dirs.each { |path| FileUtils.rm_rf(path) }
end
describe 'command failure' do describe 'command failure' do
before do before do
# Allow us to set expectations on the project directly
expect(Project).to receive(:find_each).and_yield(project) expect(Project).to receive(:find_each).and_yield(project)
expect(project.repository).to receive(:create_repository) { raise 'Fail in tests' }
allow_next_instance_of(Repository) do |repository|
allow(repository).to receive(:create_repository) { raise 'Fail in tests' }
end
end end
context 'hashed storage' do context 'hashed storage' do
it 'shows the appropriate error' do it 'shows the appropriate error' do
subject.restore subject.restore
expect(progress).to have_received(:puts).with("[Failed] restoring #{project.full_path} repository") expect(progress).to have_received(:puts).with("[Failed] restoring #{project.full_path} (#{project.disk_path})")
end end
end end
context 'legacy storage' do context 'legacy storage' do
let!(:project) { create(:project, :legacy_storage) } let_it_be(:project) { create(:project, :legacy_storage) }
it 'shows the appropriate error' do it 'shows the appropriate error' do
subject.restore subject.restore
expect(progress).to have_received(:puts).with("[Failed] restoring #{project.full_path} repository") expect(progress).to have_received(:puts).with("[Failed] restoring #{project.full_path} (#{project.disk_path})")
end end
end end
end end
...@@ -188,45 +190,15 @@ RSpec.describe Backup::Repository do ...@@ -188,45 +190,15 @@ RSpec.describe Backup::Repository do
end end
it 'cleans existing repositories' do it 'cleans existing repositories' do
wiki_repository_spy = spy(:wiki) expect(Repository).to receive(:new).twice.and_wrap_original do |method, *original_args|
repository = method.call(*original_args)
allow_next_instance_of(ProjectWiki) do |project_wiki| expect(repository).to receive(:remove)
allow(project_wiki).to receive(:repository).and_return(wiki_repository_spy)
end
expect_next_instance_of(Repository) do |repo| repository
expect(repo).to receive(:remove)
end end
subject.restore subject.restore
expect(wiki_repository_spy).to have_received(:remove)
end
end
describe '#empty_repo?' do
context 'for a wiki' do
let(:wiki) { create(:project_wiki) }
it 'invalidates the emptiness cache' do
expect(wiki.repository).to receive(:expire_emptiness_caches).once
subject.send(:empty_repo?, wiki)
end
context 'wiki repo has content' do
let!(:wiki_page) { create(:wiki_page, wiki: wiki) }
it 'returns true, regardless of bad cache value' do
expect(subject.send(:empty_repo?, wiki)).to be(false)
end
end
context 'wiki repo does not have content' do
it 'returns true, regardless of bad cache value' do
expect(subject.send(:empty_repo?, wiki)).to be_truthy
end
end
end end
end end
end end
...@@ -370,7 +370,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -370,7 +370,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
end end
it 'has defaults' do it 'has defaults' do
expect_next_instance_of(::Backup::Repository) do |instance| expect_next_instance_of(::Backup::Repositories) do |instance|
expect(instance).to receive(:dump) expect(instance).to receive(:dump)
.with(max_concurrency: 1, max_storage_concurrency: 1) .with(max_concurrency: 1, max_storage_concurrency: 1)
.and_call_original .and_call_original
...@@ -383,7 +383,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -383,7 +383,7 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 5) stub_env('GITLAB_BACKUP_MAX_CONCURRENCY', 5)
stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2) stub_env('GITLAB_BACKUP_MAX_STORAGE_CONCURRENCY', 2)
expect_next_instance_of(::Backup::Repository) do |instance| expect_next_instance_of(::Backup::Repositories) do |instance|
expect(instance).to receive(:dump) expect(instance).to receive(:dump)
.with(max_concurrency: 5, max_storage_concurrency: 2) .with(max_concurrency: 5, max_storage_concurrency: 2)
.and_call_original .and_call_original
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment