Commit 1cb3fe48 authored by Jan Provaznik's avatar Jan Provaznik

Merge branch '233626-fj-add-snippets-to-backups' into 'master'

Add snippets to gitlab backups

See merge request gitlab-org/gitlab!43694
parents f2e77c33 bab05d82
...@@ -5,6 +5,10 @@ module Shardable ...@@ -5,6 +5,10 @@ module Shardable
included do included do
belongs_to :shard belongs_to :shard
scope :for_repository_storage, -> (repository_storage) { joins(:shard).where(shards: { name: repository_storage }) }
scope :excluding_repository_storage, -> (repository_storage) { joins(:shard).where.not(shards: { name: repository_storage }) }
validates :shard, presence: true validates :shard, presence: true
end end
......
...@@ -676,8 +676,6 @@ class Project < ApplicationRecord ...@@ -676,8 +676,6 @@ class Project < ApplicationRecord
scope :joins_import_state, -> { joins("INNER JOIN project_mirror_data import_state ON import_state.project_id = projects.id") } scope :joins_import_state, -> { joins("INNER JOIN project_mirror_data import_state ON import_state.project_id = projects.id") }
scope :for_group, -> (group) { where(group: group) } scope :for_group, -> (group) { where(group: group) }
scope :for_group_and_its_subgroups, ->(group) { where(namespace_id: group.self_and_descendants.select(:id)) } scope :for_group_and_its_subgroups, ->(group) { where(namespace_id: group.self_and_descendants.select(:id)) }
scope :for_repository_storage, -> (repository_storage) { where(repository_storage: repository_storage) }
scope :excluding_repository_storage, -> (repository_storage) { where.not(repository_storage: repository_storage) }
class << self class << self
# Searches for a list of projects based on the query given in `query`. # Searches for a list of projects based on the query given in `query`.
......
---
title: Add snippets to GitLab backups
merge_request: 43694
author:
type: changed
...@@ -54,6 +54,7 @@ including: ...@@ -54,6 +54,7 @@ including:
- LFS objects - LFS objects
- Container Registry images - Container Registry images
- GitLab Pages content - GitLab Pages content
- Snippets
CAUTION: **Warning:** CAUTION: **Warning:**
GitLab does not back up any configuration files, SSL certificates, or system GitLab does not back up any configuration files, SSL certificates, or system
......
...@@ -17,9 +17,7 @@ module Backup ...@@ -17,9 +17,7 @@ module Backup
return dump_consecutive return dump_consecutive
end end
if Project.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists? check_valid_storages!
raise Error, 'repositories.storages in gitlab.yml is misconfigured'
end
semaphore = Concurrent::Semaphore.new(max_concurrency) semaphore = Concurrent::Semaphore.new(max_concurrency)
errors = Queue.new errors = Queue.new
...@@ -53,16 +51,16 @@ module Backup ...@@ -53,16 +51,16 @@ module Backup
private private
def restore_repository(container, type) def check_valid_storages!
BackupRestore.new( [ProjectRepository, SnippetRepository].each do |klass|
progress, if klass.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists?
type.repository_for(container), raise Error, "repositories.storages in gitlab.yml does not include all storages used by #{klass}"
backup_repos_path end
).restore(always_create: type.project?) end
end end
def backup_repos_path def backup_repos_path
File.join(Gitlab.config.backup.path, 'repositories') @backup_repos_path ||= File.join(Gitlab.config.backup.path, 'repositories')
end end
def prepare def prepare
...@@ -72,11 +70,20 @@ module Backup ...@@ -72,11 +70,20 @@ module Backup
end end
def dump_consecutive def dump_consecutive
Project.includes(:route, :group, namespace: :owner).find_each(batch_size: 1000) do |project| dump_consecutive_projects
dump_consecutive_snippets
end
def dump_consecutive_projects
project_relation.find_each(batch_size: 1000) do |project|
dump_project(project) dump_project(project)
end end
end end
def dump_consecutive_snippets
Snippet.find_each(batch_size: 1000) { |snippet| dump_snippet(snippet) }
end
def dump_storage(storage, semaphore, max_storage_concurrency:) def dump_storage(storage, semaphore, max_storage_concurrency:)
errors = Queue.new errors = Queue.new
queue = InterlockSizedQueue.new(1) queue = InterlockSizedQueue.new(1)
...@@ -84,13 +91,18 @@ module Backup ...@@ -84,13 +91,18 @@ module Backup
threads = Array.new(max_storage_concurrency) do threads = Array.new(max_storage_concurrency) do
Thread.new do Thread.new do
Rails.application.executor.wrap do Rails.application.executor.wrap do
while project = queue.pop while container = queue.pop
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
semaphore.acquire semaphore.acquire
end end
begin begin
dump_project(project) case container
when Project
dump_project(container)
when Snippet
dump_snippet(container)
end
rescue => e rescue => e
errors << e errors << e
break break
...@@ -102,11 +114,7 @@ module Backup ...@@ -102,11 +114,7 @@ module Backup
end end
end end
Project.for_repository_storage(storage).includes(:route, :group, namespace: :owner).find_each(batch_size: 100) do |project| enqueue_records_for_storage(storage, queue, errors)
break unless errors.empty?
queue.push(project)
end
raise errors.pop unless errors.empty? raise errors.pop unless errors.empty?
ensure ensure
...@@ -122,6 +130,36 @@ module Backup ...@@ -122,6 +130,36 @@ module Backup
backup_repository(project, Gitlab::GlRepository::DESIGN) backup_repository(project, Gitlab::GlRepository::DESIGN)
end end
def dump_snippet(snippet)
backup_repository(snippet, Gitlab::GlRepository::SNIPPET)
end
def enqueue_records_for_storage(storage, queue, errors)
records_to_enqueue(storage).each do |relation|
relation.find_each(batch_size: 100) do |project|
break unless errors.empty?
queue.push(project)
end
end
end
def records_to_enqueue(storage)
[projects_in_storage(storage), snippets_in_storage(storage)]
end
def projects_in_storage(storage)
project_relation.id_in(ProjectRepository.for_repository_storage(storage).select(:project_id))
end
def project_relation
Project.includes(:route, :group, namespace: :owner)
end
def snippets_in_storage(storage)
Snippet.id_in(SnippetRepository.for_repository_storage(storage).select(:snippet_id))
end
def backup_repository(container, type) def backup_repository(container, type)
BackupRestore.new( BackupRestore.new(
progress, progress,
...@@ -130,6 +168,14 @@ module Backup ...@@ -130,6 +168,14 @@ module Backup
).backup ).backup
end end
def restore_repository(container, type)
BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).restore(always_create: type.project?)
end
def restore_object_pools def restore_object_pools
PoolRepository.includes(:source_project).find_each do |pool| PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " - Object pool #{pool.disk_path}..." progress.puts " - Object pool #{pool.disk_path}..."
......
...@@ -21,15 +21,19 @@ RSpec.describe Backup::Repositories do ...@@ -21,15 +21,19 @@ RSpec.describe Backup::Repositories do
RSpec.shared_examples 'creates repository bundles' do RSpec.shared_examples 'creates repository bundles' do
specify :aggregate_failures do specify :aggregate_failures do
# Add data to the wiki and design repositories, so they will be included in the dump. # Add data to the wiki, design repositories, and snippets, so they will be included in the dump.
create(:wiki_page, container: project) create(:wiki_page, container: project)
create(:design, :with_file, issue: create(:issue, project: project)) create(:design, :with_file, issue: create(:issue, project: project))
project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner)
subject.dump(max_concurrency: 1, max_storage_concurrency: 1) subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle')) expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.wiki' + '.bundle')) expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.wiki' + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.design' + '.bundle')) expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.design' + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', personal_snippet.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle'))
end end
end end
......
...@@ -8,6 +8,11 @@ RSpec.describe ProjectRepository do ...@@ -8,6 +8,11 @@ RSpec.describe ProjectRepository do
it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:project) }
end end
it_behaves_like 'shardable scopes' do
let_it_be(:record_1) { create(:project_repository) }
let_it_be(:record_2, reload: true) { create(:project_repository) }
end
describe '.find_project' do describe '.find_project' do
it 'finds project by disk path' do it 'finds project by disk path' do
project = create(:project) project = create(:project)
......
...@@ -5567,32 +5567,6 @@ RSpec.describe Project do ...@@ -5567,32 +5567,6 @@ RSpec.describe Project do
end end
end end
describe '.for_repository_storage' do
it 'returns the projects for a given repository storage' do
stub_storage_settings('test_second_storage' => {
'path' => TestEnv::SECOND_STORAGE_PATH,
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address
})
expected_project = create(:project, repository_storage: 'default')
create(:project, repository_storage: 'test_second_storage')
expect(described_class.for_repository_storage('default')).to eq([expected_project])
end
end
describe '.excluding_repository_storage' do
it 'returns the projects excluding the given repository storage' do
stub_storage_settings('test_second_storage' => {
'path' => TestEnv::SECOND_STORAGE_PATH,
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address
})
expected_project = create(:project, repository_storage: 'test_second_storage')
create(:project, repository_storage: 'default')
expect(described_class.excluding_repository_storage('default')).to eq([expected_project])
end
end
describe '.deployments' do describe '.deployments' do
subject { project.deployments } subject { project.deployments }
......
...@@ -13,6 +13,11 @@ RSpec.describe SnippetRepository do ...@@ -13,6 +13,11 @@ RSpec.describe SnippetRepository do
it { is_expected.to belong_to(:snippet) } it { is_expected.to belong_to(:snippet) }
end end
it_behaves_like 'shardable scopes' do
let_it_be(:record_1) { create(:snippet_repository) }
let_it_be(:record_2, reload: true) { create(:snippet_repository) }
end
describe '.find_snippet' do describe '.find_snippet' do
it 'finds snippet by disk path' do it 'finds snippet by disk path' do
snippet = create(:snippet, author: user) snippet = create(:snippet, author: user)
......
# frozen_string_literal: true
RSpec.shared_examples 'shardable scopes' do
let_it_be(:secondary_shard) { create(:shard, name: 'test_second_storage') }
before do
record_2.update!(shard: secondary_shard)
end
describe '.for_repository_storage' do
it 'returns the objects for a given repository storage' do
expect(described_class.for_repository_storage('default')).to eq([record_1])
end
end
describe '.excluding_repository_storage' do
it 'returns the objects excluding the given repository storage' do
expect(described_class.excluding_repository_storage('default')).to eq([record_2])
end
end
end
...@@ -284,69 +284,76 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -284,69 +284,76 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
end end
context 'multiple repository storages' do context 'multiple repository storages' do
let_it_be(:default_storage_hash) { Gitlab.config.repositories.storages.default.to_h } include StubConfiguration
let(:default_storage_name) { 'default' }
let(:second_storage_name) { 'test_second_storage' }
before do before do
# We only need a backup of the repositories for this test # We only need a backup of the repositories for this test
stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,registry') stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,registry')
stub_storage_settings( second_storage_name => {
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address,
'path' => TestEnv::SECOND_STORAGE_PATH
})
end
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages) shared_examples 'includes repositories in all repository storages' do
specify :aggregate_failures do
# Avoid asking gitaly about the root ref (which will fail because of the project_a = create(:project, :repository)
# mocked storages) project_a.track_project_repository
allow_any_instance_of(Repository).to receive(:empty?).and_return(false) project_snippet_a = create(:project_snippet, :repository, project: project_a, author: project_a.owner)
project_b = create(:project, :repository, repository_storage: second_storage_name)
project_b.track_project_repository
project_snippet_b = create(:project_snippet, :repository, project: project_b, author: project_b.owner)
project_snippet_b.snippet_repository.update!(shard: project_b.project_repository.shard)
create(:wiki_page, container: project_a)
create(:design, :with_file, issue: create(:issue, project: project_a))
move_repository_to_secondary(project_b)
move_repository_to_secondary(project_snippet_b)
FileUtils.mkdir_p(b_storage_dir) expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout
# Even when overriding the storage, we have to move it there, so it exists tar_contents, exit_status = Gitlab::Popen.popen(
Gitlab::GitalyClient::StorageSettings.allow_disk_access do %W{tar -tvf #{backup_tar} repositories}
FileUtils.mv(
File.join(Settings.absolute(storages['default'].legacy_disk_path), project_b.repository.disk_path + '.git'),
Rails.root.join(storages['test_second_storage'].legacy_disk_path, project_b.repository.disk_path + '.git')
) )
end
end
after do tar_lines = tar_contents.lines.grep(/\.bundle/)
FileUtils.rm_rf(test_second_storage_dir)
end
let(:test_second_storage_dir) { Dir.mktmpdir } expect(exit_status).to eq(0)
let(:test_second_storage) do [
Gitlab::GitalyClient::StorageSettings.new(default_storage_hash.merge('path' => test_second_storage_dir)) "#{project_a.disk_path}.bundle",
end "#{project_a.disk_path}.wiki.bundle",
"#{project_a.disk_path}.design.bundle",
"#{project_b.disk_path}.bundle",
"#{project_snippet_a.disk_path}.bundle",
"#{project_snippet_b.disk_path}.bundle"
].each do |repo_name|
repo_lines = tar_lines.grep(/#{repo_name}/)
let(:storages) do expect(repo_lines.size).to eq 1
{ # Checking that the size of the bundle is bigger than 0
'default' => Gitlab.config.repositories.storages.default, expect(repo_lines.first.split[4].to_i > 0).to be true
'test_second_storage' => test_second_storage end
}
end end
let!(:project_a) { create(:project, :repository) } def move_repository_to_secondary(record)
let!(:project_a_wiki_page) { create(:wiki_page, container: project_a) } Gitlab::GitalyClient::StorageSettings.allow_disk_access do
let!(:project_a_design) { create(:design, :with_file, issue: create(:issue, project: project_a)) } default_shard_legacy_path = Gitlab.config.repositories.storages.default.legacy_disk_path
let!(:project_b) { create(:project, :repository, repository_storage: 'test_second_storage') } secondary_legacy_path = Gitlab.config.repositories.storages[second_storage_name].legacy_disk_path
let!(:b_storage_dir) { File.join(test_second_storage_dir, File.dirname(project_b.disk_path)) } dst_dir = File.join(secondary_legacy_path, File.dirname(record.disk_path))
shared_examples 'includes repositories in all repository storages' do
specify :aggregate_failures do
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout
tar_contents, exit_status = Gitlab::Popen.popen( FileUtils.mkdir_p(dst_dir) unless Dir.exist?(dst_dir)
%W{tar -tvf #{backup_tar} repositories}
)
expect(exit_status).to eq(0) FileUtils.mv(
expect(tar_contents).to include( File.join(default_shard_legacy_path, record.disk_path + '.git'),
"repositories/#{project_a.disk_path}.bundle", File.join(secondary_legacy_path, record.disk_path + '.git')
"repositories/#{project_a.disk_path}.wiki.bundle",
"repositories/#{project_a.disk_path}.design.bundle",
"repositories/#{project_b.disk_path}.bundle"
) )
end end
end end
end
context 'no concurrency' do context 'no concurrency' do
it_behaves_like 'includes repositories in all repository storages' it_behaves_like 'includes repositories in all repository storages'
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment