Commit bab05d82 authored by Francisco Javier López's avatar Francisco Javier López Committed by Jan Provaznik

Add snippets to gitlab backups

In this commit, we add snippets to GitLab backups. Well, not
exactly snippets but the snippet repositories which weren't
added before.
parent 3dc10a64
...@@ -5,6 +5,10 @@ module Shardable ...@@ -5,6 +5,10 @@ module Shardable
included do included do
belongs_to :shard belongs_to :shard
scope :for_repository_storage, -> (repository_storage) { joins(:shard).where(shards: { name: repository_storage }) }
scope :excluding_repository_storage, -> (repository_storage) { joins(:shard).where.not(shards: { name: repository_storage }) }
validates :shard, presence: true validates :shard, presence: true
end end
......
...@@ -676,8 +676,6 @@ class Project < ApplicationRecord ...@@ -676,8 +676,6 @@ class Project < ApplicationRecord
scope :joins_import_state, -> { joins("INNER JOIN project_mirror_data import_state ON import_state.project_id = projects.id") } scope :joins_import_state, -> { joins("INNER JOIN project_mirror_data import_state ON import_state.project_id = projects.id") }
scope :for_group, -> (group) { where(group: group) } scope :for_group, -> (group) { where(group: group) }
scope :for_group_and_its_subgroups, ->(group) { where(namespace_id: group.self_and_descendants.select(:id)) } scope :for_group_and_its_subgroups, ->(group) { where(namespace_id: group.self_and_descendants.select(:id)) }
scope :for_repository_storage, -> (repository_storage) { where(repository_storage: repository_storage) }
scope :excluding_repository_storage, -> (repository_storage) { where.not(repository_storage: repository_storage) }
class << self class << self
# Searches for a list of projects based on the query given in `query`. # Searches for a list of projects based on the query given in `query`.
......
---
title: Add snippets to GitLab backups
merge_request: 43694
author:
type: changed
...@@ -54,6 +54,7 @@ including: ...@@ -54,6 +54,7 @@ including:
- LFS objects - LFS objects
- Container Registry images - Container Registry images
- GitLab Pages content - GitLab Pages content
- Snippets
CAUTION: **Warning:** CAUTION: **Warning:**
GitLab does not back up any configuration files, SSL certificates, or system GitLab does not back up any configuration files, SSL certificates, or system
...@@ -1081,7 +1082,7 @@ For more information, see: ...@@ -1081,7 +1082,7 @@ For more information, see:
- PostgreSQL issue tracker: - PostgreSQL issue tracker:
- [Not being a superuser](https://www.postgresql.org/message-id/201110220712.30886.adrian.klaver@gmail.com). - [Not being a superuser](https://www.postgresql.org/message-id/201110220712.30886.adrian.klaver@gmail.com).
- [Having different owners](https://www.postgresql.org/message-id/2039.1177339749@sss.pgh.pa.us). - [Having different owners](https://www.postgresql.org/message-id/2039.1177339749@sss.pgh.pa.us).
- Stack Overflow: [Resulting errors](https://stackoverflow.com/questions/4368789/error-must-be-owner-of-language-plpgsql). - Stack Overflow: [Resulting errors](https://stackoverflow.com/questions/4368789/error-must-be-owner-of-language-plpgsql).
### When the secrets file is lost ### When the secrets file is lost
......
...@@ -17,9 +17,7 @@ module Backup ...@@ -17,9 +17,7 @@ module Backup
return dump_consecutive return dump_consecutive
end end
if Project.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists? check_valid_storages!
raise Error, 'repositories.storages in gitlab.yml is misconfigured'
end
semaphore = Concurrent::Semaphore.new(max_concurrency) semaphore = Concurrent::Semaphore.new(max_concurrency)
errors = Queue.new errors = Queue.new
...@@ -53,16 +51,16 @@ module Backup ...@@ -53,16 +51,16 @@ module Backup
private private
def restore_repository(container, type) def check_valid_storages!
BackupRestore.new( [ProjectRepository, SnippetRepository].each do |klass|
progress, if klass.excluding_repository_storage(Gitlab.config.repositories.storages.keys).exists?
type.repository_for(container), raise Error, "repositories.storages in gitlab.yml does not include all storages used by #{klass}"
backup_repos_path end
).restore(always_create: type.project?) end
end end
def backup_repos_path def backup_repos_path
File.join(Gitlab.config.backup.path, 'repositories') @backup_repos_path ||= File.join(Gitlab.config.backup.path, 'repositories')
end end
def prepare def prepare
...@@ -72,11 +70,20 @@ module Backup ...@@ -72,11 +70,20 @@ module Backup
end end
def dump_consecutive def dump_consecutive
Project.includes(:route, :group, namespace: :owner).find_each(batch_size: 1000) do |project| dump_consecutive_projects
dump_consecutive_snippets
end
def dump_consecutive_projects
project_relation.find_each(batch_size: 1000) do |project|
dump_project(project) dump_project(project)
end end
end end
def dump_consecutive_snippets
Snippet.find_each(batch_size: 1000) { |snippet| dump_snippet(snippet) }
end
def dump_storage(storage, semaphore, max_storage_concurrency:) def dump_storage(storage, semaphore, max_storage_concurrency:)
errors = Queue.new errors = Queue.new
queue = InterlockSizedQueue.new(1) queue = InterlockSizedQueue.new(1)
...@@ -84,13 +91,18 @@ module Backup ...@@ -84,13 +91,18 @@ module Backup
threads = Array.new(max_storage_concurrency) do threads = Array.new(max_storage_concurrency) do
Thread.new do Thread.new do
Rails.application.executor.wrap do Rails.application.executor.wrap do
while project = queue.pop while container = queue.pop
ActiveSupport::Dependencies.interlock.permit_concurrent_loads do ActiveSupport::Dependencies.interlock.permit_concurrent_loads do
semaphore.acquire semaphore.acquire
end end
begin begin
dump_project(project) case container
when Project
dump_project(container)
when Snippet
dump_snippet(container)
end
rescue => e rescue => e
errors << e errors << e
break break
...@@ -102,11 +114,7 @@ module Backup ...@@ -102,11 +114,7 @@ module Backup
end end
end end
Project.for_repository_storage(storage).includes(:route, :group, namespace: :owner).find_each(batch_size: 100) do |project| enqueue_records_for_storage(storage, queue, errors)
break unless errors.empty?
queue.push(project)
end
raise errors.pop unless errors.empty? raise errors.pop unless errors.empty?
ensure ensure
...@@ -122,6 +130,36 @@ module Backup ...@@ -122,6 +130,36 @@ module Backup
backup_repository(project, Gitlab::GlRepository::DESIGN) backup_repository(project, Gitlab::GlRepository::DESIGN)
end end
def dump_snippet(snippet)
backup_repository(snippet, Gitlab::GlRepository::SNIPPET)
end
def enqueue_records_for_storage(storage, queue, errors)
records_to_enqueue(storage).each do |relation|
relation.find_each(batch_size: 100) do |project|
break unless errors.empty?
queue.push(project)
end
end
end
def records_to_enqueue(storage)
[projects_in_storage(storage), snippets_in_storage(storage)]
end
def projects_in_storage(storage)
project_relation.id_in(ProjectRepository.for_repository_storage(storage).select(:project_id))
end
def project_relation
Project.includes(:route, :group, namespace: :owner)
end
def snippets_in_storage(storage)
Snippet.id_in(SnippetRepository.for_repository_storage(storage).select(:snippet_id))
end
def backup_repository(container, type) def backup_repository(container, type)
BackupRestore.new( BackupRestore.new(
progress, progress,
...@@ -130,6 +168,14 @@ module Backup ...@@ -130,6 +168,14 @@ module Backup
).backup ).backup
end end
def restore_repository(container, type)
BackupRestore.new(
progress,
type.repository_for(container),
backup_repos_path
).restore(always_create: type.project?)
end
def restore_object_pools def restore_object_pools
PoolRepository.includes(:source_project).find_each do |pool| PoolRepository.includes(:source_project).find_each do |pool|
progress.puts " - Object pool #{pool.disk_path}..." progress.puts " - Object pool #{pool.disk_path}..."
......
...@@ -21,15 +21,19 @@ RSpec.describe Backup::Repositories do ...@@ -21,15 +21,19 @@ RSpec.describe Backup::Repositories do
RSpec.shared_examples 'creates repository bundles' do RSpec.shared_examples 'creates repository bundles' do
specify :aggregate_failures do specify :aggregate_failures do
# Add data to the wiki and design repositories, so they will be included in the dump. # Add data to the wiki, design repositories, and snippets, so they will be included in the dump.
create(:wiki_page, container: project) create(:wiki_page, container: project)
create(:design, :with_file, issue: create(:issue, project: project)) create(:design, :with_file, issue: create(:issue, project: project))
project_snippet = create(:project_snippet, :repository, project: project)
personal_snippet = create(:personal_snippet, :repository, author: project.owner)
subject.dump(max_concurrency: 1, max_storage_concurrency: 1) subject.dump(max_concurrency: 1, max_storage_concurrency: 1)
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle')) expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.wiki' + '.bundle')) expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.wiki' + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.design' + '.bundle')) expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project.disk_path + '.design' + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', personal_snippet.disk_path + '.bundle'))
expect(File).to exist(File.join(Gitlab.config.backup.path, 'repositories', project_snippet.disk_path + '.bundle'))
end end
end end
......
...@@ -8,6 +8,11 @@ RSpec.describe ProjectRepository do ...@@ -8,6 +8,11 @@ RSpec.describe ProjectRepository do
it { is_expected.to belong_to(:project) } it { is_expected.to belong_to(:project) }
end end
it_behaves_like 'shardable scopes' do
let_it_be(:record_1) { create(:project_repository) }
let_it_be(:record_2, reload: true) { create(:project_repository) }
end
describe '.find_project' do describe '.find_project' do
it 'finds project by disk path' do it 'finds project by disk path' do
project = create(:project) project = create(:project)
......
...@@ -5567,32 +5567,6 @@ RSpec.describe Project do ...@@ -5567,32 +5567,6 @@ RSpec.describe Project do
end end
end end
describe '.for_repository_storage' do
it 'returns the projects for a given repository storage' do
stub_storage_settings('test_second_storage' => {
'path' => TestEnv::SECOND_STORAGE_PATH,
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address
})
expected_project = create(:project, repository_storage: 'default')
create(:project, repository_storage: 'test_second_storage')
expect(described_class.for_repository_storage('default')).to eq([expected_project])
end
end
describe '.excluding_repository_storage' do
it 'returns the projects excluding the given repository storage' do
stub_storage_settings('test_second_storage' => {
'path' => TestEnv::SECOND_STORAGE_PATH,
'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address
})
expected_project = create(:project, repository_storage: 'test_second_storage')
create(:project, repository_storage: 'default')
expect(described_class.excluding_repository_storage('default')).to eq([expected_project])
end
end
describe '.deployments' do describe '.deployments' do
subject { project.deployments } subject { project.deployments }
......
...@@ -13,6 +13,11 @@ RSpec.describe SnippetRepository do ...@@ -13,6 +13,11 @@ RSpec.describe SnippetRepository do
it { is_expected.to belong_to(:snippet) } it { is_expected.to belong_to(:snippet) }
end end
it_behaves_like 'shardable scopes' do
let_it_be(:record_1) { create(:snippet_repository) }
let_it_be(:record_2, reload: true) { create(:snippet_repository) }
end
describe '.find_snippet' do describe '.find_snippet' do
it 'finds snippet by disk path' do it 'finds snippet by disk path' do
snippet = create(:snippet, author: user) snippet = create(:snippet, author: user)
......
# frozen_string_literal: true
RSpec.shared_examples 'shardable scopes' do
let_it_be(:secondary_shard) { create(:shard, name: 'test_second_storage') }
before do
record_2.update!(shard: secondary_shard)
end
describe '.for_repository_storage' do
it 'returns the objects for a given repository storage' do
expect(described_class.for_repository_storage('default')).to eq([record_1])
end
end
describe '.excluding_repository_storage' do
it 'returns the objects excluding the given repository storage' do
expect(described_class.excluding_repository_storage('default')).to eq([record_2])
end
end
end
...@@ -284,67 +284,74 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do ...@@ -284,67 +284,74 @@ RSpec.describe 'gitlab:app namespace rake task', :delete do
end end
context 'multiple repository storages' do context 'multiple repository storages' do
let_it_be(:default_storage_hash) { Gitlab.config.repositories.storages.default.to_h } include StubConfiguration
let(:default_storage_name) { 'default' }
let(:second_storage_name) { 'test_second_storage' }
before do before do
# We only need a backup of the repositories for this test # We only need a backup of the repositories for this test
stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,registry') stub_env('SKIP', 'db,uploads,builds,artifacts,lfs,registry')
stub_storage_settings( second_storage_name => {
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages) 'gitaly_address' => Gitlab.config.repositories.storages.default.gitaly_address,
'path' => TestEnv::SECOND_STORAGE_PATH
# Avoid asking gitaly about the root ref (which will fail because of the })
# mocked storages)
allow_any_instance_of(Repository).to receive(:empty?).and_return(false)
FileUtils.mkdir_p(b_storage_dir)
# Even when overriding the storage, we have to move it there, so it exists
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
FileUtils.mv(
File.join(Settings.absolute(storages['default'].legacy_disk_path), project_b.repository.disk_path + '.git'),
Rails.root.join(storages['test_second_storage'].legacy_disk_path, project_b.repository.disk_path + '.git')
)
end
end end
after do
FileUtils.rm_rf(test_second_storage_dir)
end
let(:test_second_storage_dir) { Dir.mktmpdir }
let(:test_second_storage) do
Gitlab::GitalyClient::StorageSettings.new(default_storage_hash.merge('path' => test_second_storage_dir))
end
let(:storages) do
{
'default' => Gitlab.config.repositories.storages.default,
'test_second_storage' => test_second_storage
}
end
let!(:project_a) { create(:project, :repository) }
let!(:project_a_wiki_page) { create(:wiki_page, container: project_a) }
let!(:project_a_design) { create(:design, :with_file, issue: create(:issue, project: project_a)) }
let!(:project_b) { create(:project, :repository, repository_storage: 'test_second_storage') }
let!(:b_storage_dir) { File.join(test_second_storage_dir, File.dirname(project_b.disk_path)) }
shared_examples 'includes repositories in all repository storages' do shared_examples 'includes repositories in all repository storages' do
specify :aggregate_failures do specify :aggregate_failures do
project_a = create(:project, :repository)
project_a.track_project_repository
project_snippet_a = create(:project_snippet, :repository, project: project_a, author: project_a.owner)
project_b = create(:project, :repository, repository_storage: second_storage_name)
project_b.track_project_repository
project_snippet_b = create(:project_snippet, :repository, project: project_b, author: project_b.owner)
project_snippet_b.snippet_repository.update!(shard: project_b.project_repository.shard)
create(:wiki_page, container: project_a)
create(:design, :with_file, issue: create(:issue, project: project_a))
move_repository_to_secondary(project_b)
move_repository_to_secondary(project_snippet_b)
expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout expect { run_rake_task('gitlab:backup:create') }.to output.to_stdout
tar_contents, exit_status = Gitlab::Popen.popen( tar_contents, exit_status = Gitlab::Popen.popen(
%W{tar -tvf #{backup_tar} repositories} %W{tar -tvf #{backup_tar} repositories}
) )
tar_lines = tar_contents.lines.grep(/\.bundle/)
expect(exit_status).to eq(0) expect(exit_status).to eq(0)
expect(tar_contents).to include(
"repositories/#{project_a.disk_path}.bundle", [
"repositories/#{project_a.disk_path}.wiki.bundle", "#{project_a.disk_path}.bundle",
"repositories/#{project_a.disk_path}.design.bundle", "#{project_a.disk_path}.wiki.bundle",
"repositories/#{project_b.disk_path}.bundle" "#{project_a.disk_path}.design.bundle",
) "#{project_b.disk_path}.bundle",
"#{project_snippet_a.disk_path}.bundle",
"#{project_snippet_b.disk_path}.bundle"
].each do |repo_name|
repo_lines = tar_lines.grep(/#{repo_name}/)
expect(repo_lines.size).to eq 1
# Checking that the size of the bundle is bigger than 0
expect(repo_lines.first.split[4].to_i > 0).to be true
end
end
def move_repository_to_secondary(record)
Gitlab::GitalyClient::StorageSettings.allow_disk_access do
default_shard_legacy_path = Gitlab.config.repositories.storages.default.legacy_disk_path
secondary_legacy_path = Gitlab.config.repositories.storages[second_storage_name].legacy_disk_path
dst_dir = File.join(secondary_legacy_path, File.dirname(record.disk_path))
FileUtils.mkdir_p(dst_dir) unless Dir.exist?(dst_dir)
FileUtils.mv(
File.join(default_shard_legacy_path, record.disk_path + '.git'),
File.join(secondary_legacy_path, record.disk_path + '.git')
)
end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment