Commit 1cb3fe48 authored by Jan Provaznik's avatar Jan Provaznik

Merge branch '233626-fj-add-snippets-to-backups' into 'master'

Add snippets to gitlab backups

See merge request gitlab-org/gitlab!43694
parents f2e77c33 bab05d82
......@@ -5,6 +5,10 @@ module Shardable
included do
belongs_to :shard
scope :for_repository_storage, -> (repository_storage) { joins(:shard).where(shards: { name: repository_storage }) }
scope :excluding_repository_storage, -> (repository_storage) { joins(:shard).where.not(shards: { name: repository_storage }) }
validates :shard, presence: true
end
......
......@@ -676,8 +676,6 @@ class Project < ApplicationRecord
scope :joins_import_state, -> { joins("INNER JOIN project_mirror_data import_state ON import_state.project_id = projects.id") }
scope :for_group, -> (group) { where(group: group) }
scope :for_group_and_its_subgroups, ->(group) { where(namespace_id: group.self_and_descendants.select(:id)) }
scope :for_repository_storage, -> (repository_storage) { where(repository_storage: repository_storage) }
scope :excluding_repository_storage, -> (repository_storage) { where.not(repository_storage: repository_storage) }
class << self
# Searches for a list of projects based on the query given in `query`.
......
---
title: Add snippets to GitLab backups
merge_request: 43694
author:
type: changed
......@@ -54,6 +54,7 @@ including:
- LFS objects
- Container Registry images
- GitLab Pages content
- Snippets
CAUTION: **Warning:**
GitLab does not back up any configuration files, SSL certificates, or system
......@@ -1081,7 +1082,7 @@ For more information, see:
- PostgreSQL issue tracker:
- [Not being a superuser](https://www.postgresql.org/message-id/201110220712.30886.adrian.klaver@gmail.com).
- [Having different owners](https://www.postgresql.org/message-id/2039.1177339749@sss.pgh.pa.us).
- Stack Overflow: [Resulting errors](https://stackoverflow.com/questions/4368789/error-must-be-owner-of-language-plpgsql).
### When the secrets file is lost
......
......@@ -17,9 +17,7 @@ module Backup
return dump_consecutive
end
if Project.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists?
raise Error, 'repositories.storages in gitlab.yml is misconfigured'
end
check_valid_storages!
semaphore = Concurrent::Semaphore.new(max_concurrency)
errors = Queue.new
......@@ -53,16 +51,16 @@ module Backup
private
def restore_repository(container, type)
BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).restore(always_create: type.project?)
def check_valid_storages!
[ProjectRepository, SnippetRepository].each do |klass|
if klass.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists?
raise Error, "repositories.storages in gitlab.yml does not include all storages used by #{klass}"
end
end
end
def backup_repos_path
File.join(Gitlab.config.backup.path, 'repositories')
@backup_repos_path ||= File.join(Gitlab.config.backup.path, 'repositories')
end
def prepare
......@@ -72,11 +70,20 @@ module Backup
end
def dump_consecutive
Project.includes(:route, :group, namespace: :owner).find_each(batch_size: 1000) do |project|
dump_consecutive_projects
dump_consecutive_snippets
end
def dump_consecutive_projects
project_relation.find_each(batch_size: 1000) do |project|
dump_project(project)
end
end
def dump_consecutive_snippets
Snippet.find_each(batch_size: 1000) { |snippet| dump_snippet(snippet) }
end
def dump_storage(storage, semaphore, max_storage_concurrency:)
errors = Queue.new
queue = InterlockSizedQueue.new(1)
......@@ -84,13 +91,18 @@ module Backup
threads = Array.new(max_storage_concurrency) do
Thread.new do
Rails.application.executor.wrap do
while project = queue.pop
while container = queue.pop
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
semaphore.acquire
end
begin
dump_project(project)
case container
when Project
dump_project(container)
when Snippet
dump_snippet(container)
end
rescue => e
errors << e
break
......@@ -102,11 +114,7 @@ module Backup
end
end
Project.for_repository_storage(storage).includes(:route, :group, namespace: :owner).find_each(batch_size: 100) do |project|
break unless errors.empty?
queue.push(project)
end
enqueue_records_for_storage(storage, queue, errors)
raise errors.pop unless errors.empty?
ensure
......@@ -122,6 +130,36 @@ module Backup
backup_repository(project, Gitlab::GlRepository::DESIGN)
end
def dump_snippet(snippet)
backup_repository(snippet, Gitlab::GlRepository::SNIPPET)
end
def enqueue_records_for_storage(storage, queue, errors)
records_to_enqueue(storage).each do |relation|
relation.find_each(batch_size: 100) do |project|
break unless errors.empty?
queue.push(project)
end
end
end
def records_to_enqueue(storage)
[projects_in_storage(storage), snippets_in_storage(storage)]
end
def projects_in_storage(storage)
project_relation.id_in(ProjectRepository.for_repository_storage(storage).select(:project_id))
end
def project_relation
Project.includes(:route, :group, namespace: :owner)
end
def snippets_in_storage(storage)
Snippet.id_in(SnippetRepository.for_repository_storage(storage).select(:snippet_id))
end
def backup_repository(container, type)
BackupRestore.new(
progress,
......@@ -130,6 +168,14 @@ module Backup
).backup
end
def restore_repository(container, type)
BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).restore(always_create: type.project?)
end
def restore_object_pools
PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " - Object pool #{pool.disk_path}..."
......
......@@ -21,15 +21,19 @@ RSpec.describe Backup::Repositories do
RSpec.shared_examples 'creates repository bundles' do
specify :aggregate_failures do
# Add data to the wiki and design repositories, so they will be included in the dump.
# Add data to the wiki, design repositories, and snippets, so they will be included in the dump.
create(:wiki_page, container: project)
create(:design, :with_file, issue: create(:issue, project: project))
project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner)
subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.wiki' + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.design' + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', personal_snippet.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle'))
end
end
......
......@@ -8,6 +8,11 @@ RSpec.describe ProjectRepository do
it { is_expected.to belong_to(:project) }
end
it_behaves_like 'shardable scopes' do
let_it_be(:record_1) { create(:project_repository) }
let_it_be(:record_2, reload: true) { create(:project_repository) }
end
describe '.find_project' do
it 'finds project by disk path' do
project = create(:project)
......
......@@ -5567,32 +5567,6 @@ RSpec.describe Project do
end
end
describe '.for_repository_storage' do
it 'returns the projects for a given repository storage' do
stub_storage_settings('test_second_storage' => {
'path' => TestEnv::SECOND_STORAGE_PATH,
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address
})
expected_project = create(:project, repository_storage: 'default')
create(:project, repository_storage: 'test_second_storage')
expect(described_class.for_repository_storage('default')).to eq([expected_project])
end
end
describe '.excluding_repository_storage' do
it 'returns the projects excluding the given repository storage' do
stub_storage_settings('test_second_storage' => {
'path' => TestEnv::SECOND_STORAGE_PATH,
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address
})
expected_project = create(:project, repository_storage: 'test_second_storage')
create(:project, repository_storage: 'default')
expect(described_class.excluding_repository_storage('default')).to eq([expected_project])
end
end
describe '.deployments' do
subject { project.deployments }
......
......@@ -13,6 +13,11 @@ RSpec.describe SnippetRepository do
it { is_expected.to belong_to(:snippet) }
end
it_behaves_like 'shardable scopes' do
let_it_be(:record_1) { create(:snippet_repository) }
let_it_be(:record_2, reload: true) { create(:snippet_repository) }
end
describe '.find_snippet' do
it 'finds snippet by disk path' do
snippet = create(:snippet, author: user)
......
# frozen_string_literal: true
RSpec.shared_examples 'shardable scopes' do
let_it_be(:secondary_shard) { create(:shard, name: 'test_second_storage') }
before do
record_2.update!(shard: secondary_shard)
end
describe '.for_repository_storage' do
it 'returns the objects for a given repository storage' do
expect(described_class.for_repository_storage('default')).to eq([record_1])
end
end
describe '.excluding_repository_storage' do
it 'returns the objects excluding the given repository storage' do
expect(described_class.excluding_repository_storage('default')).to eq([record_2])
end
end
end
......@@ -284,67 +284,74 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
end
context 'multiple repository storages' do
let_it_be(:default_storage_hash) { Gitlab.config.repositories.storages.default.to_h }
include StubConfiguration
let(:default_storage_name) { 'default' }
let(:second_storage_name) { 'test_second_storage' }
before do
# We only need a backup of the repositories for this test
stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,registry')
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
# Avoid asking gitaly about the root ref (which will fail because of the
# mocked storages)
allow_any_instance_of(Repository).to receive(:empty?).and_return(false)
FileUtils.mkdir_p(b_storage_dir)
# Even when overriding the storage, we have to move it there, so it exists
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
FileUtils.mv(
File.join(Settings.absolute(storages['default'].legacy_disk_path), project_b.repository.disk_path + '.git'),
Rails.root.join(storages['test_second_storage'].legacy_disk_path, project_b.repository.disk_path + '.git')
)
end
stub_storage_settings( second_storage_name => {
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address,
'path' => TestEnv::SECOND_STORAGE_PATH
})
end
after do
FileUtils.rm_rf(test_second_storage_dir)
end
let(:test_second_storage_dir) { Dir.mktmpdir }
let(:test_second_storage) do
Gitlab::GitalyClient::StorageSettings.new(default_storage_hash.merge('path' => test_second_storage_dir))
end
let(:storages) do
{
'default' => Gitlab.config.repositories.storages.default,
'test_second_storage' => test_second_storage
}
end
let!(:project_a) { create(:project, :repository) }
let!(:project_a_wiki_page) { create(:wiki_page, container: project_a) }
let!(:project_a_design) { create(:design, :with_file, issue: create(:issue, project: project_a)) }
let!(:project_b) { create(:project, :repository, repository_storage: 'test_second_storage') }
let!(:b_storage_dir) { File.join(test_second_storage_dir, File.dirname(project_b.disk_path)) }
shared_examples 'includes repositories in all repository storages' do
specify :aggregate_failures do
project_a = create(:project, :repository)
project_a.track_project_repository
project_snippet_a = create(:project_snippet, :repository, project: project_a, author: project_a.owner)
project_b = create(:project, :repository, repository_storage: second_storage_name)
project_b.track_project_repository
project_snippet_b = create(:project_snippet, :repository, project: project_b, author: project_b.owner)
project_snippet_b.snippet_repository.update!(shard: project_b.project_repository.shard)
create(:wiki_page, container: project_a)
create(:design, :with_file, issue: create(:issue, project: project_a))
move_repository_to_secondary(project_b)
move_repository_to_secondary(project_snippet_b)
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout
tar_contents, exit_status = Gitlab::Popen.popen(
%W{tar -tvf #{backup_tar} repositories}
)
tar_lines = tar_contents.lines.grep(/\.bundle/)
expect(exit_status).to eq(0)
expect(tar_contents).to include(
"repositories/#{project_a.disk_path}.bundle",
"repositories/#{project_a.disk_path}.wiki.bundle",
"repositories/#{project_a.disk_path}.design.bundle",
"repositories/#{project_b.disk_path}.bundle"
)
[
"#{project_a.disk_path}.bundle",
"#{project_a.disk_path}.wiki.bundle",
"#{project_a.disk_path}.design.bundle",
"#{project_b.disk_path}.bundle",
"#{project_snippet_a.disk_path}.bundle",
"#{project_snippet_b.disk_path}.bundle"
].each do |repo_name|
repo_lines = tar_lines.grep(/#{repo_name}/)
expect(repo_lines.size).to eq 1
# Checking that the size of the bundle is bigger than 0
expect(repo_lines.first.split[4].to_i > 0).to be true
end
end
def move_repository_to_secondary(record)
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
default_shard_legacy_path = Gitlab.config.repositories.storages.default.legacy_disk_path
secondary_legacy_path = Gitlab.config.repositories.storages[second_storage_name].legacy_disk_path
dst_dir = File.join(secondary_legacy_path, File.dirname(record.disk_path))
FileUtils.mkdir_p(dst_dir) unless Dir.exist?(dst_dir)
FileUtils.mv(
File.join(default_shard_legacy_path, record.disk_path + '.git'),
File.join(secondary_legacy_path, record.disk_path + '.git')
)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment