Commit 3aedccb1 authored by Zeger-Jan van de Weg's avatar Zeger-Jan van de Weg

Port cleanup tasks to use Gitaly

Rake tasks cleaning up the Git storage were still using direct disk
access, which won't work if these aren't attached. To mitigate a
migration issue was created.

To port gitlab:cleanup:dirs, and gitlab:cleanup:repos, a new RPC was
required, ListDirectories. This was implemented in Gitaly, through
https://gitlab.com/gitlab-org/gitaly/merge_requests/868.

To be able to use the new RPC the Gitaly server was bumped to v0.120.

This is an RPC that will not use feature gates, as this doesn't scale on
.com so there is no way to test it at scale. Futhermore, we _know_ it
doesn't scale, but this might be a useful task for smaller instances.

Lastly, the tests are slightly updated to also work when the disk isn't
attached. Eventhough this is not planned, it was very little effort and
thus I applied the boy scout rule.

Closes https://gitlab.com/gitlab-org/gitaly/issues/954
Closes https://gitlab.com/gitlab-org/gitlab-ce/issues/40529
parent c380d3ac
---
title: Administrative cleanup rake tasks now leverage Gitaly
merge_request: 21588
author:
type: changed
...@@ -5,6 +5,14 @@ module Gitlab ...@@ -5,6 +5,14 @@ module Gitlab
@storage = storage @storage = storage
end end
# Returns all directories in the git storage directory, lexically ordered
def list_directories(depth: 1)
request = Gitaly::ListDirectoriesRequest.new(storage_name: @storage, depth: depth)
GitalyClient.call(@storage, :storage_service, :list_directories, request)
.flat_map(&:paths)
end
# Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation. # Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation.
def delete_all_repositories def delete_all_repositories
request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage) request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage)
......
# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/954 # frozen_string_literal: true
# require 'set'
namespace :gitlab do namespace :gitlab do
namespace :cleanup do namespace :cleanup do
HASHED_REPOSITORY_NAME = '@hashed'.freeze
desc "GitLab | Cleanup | Clean namespaces" desc "GitLab | Cleanup | Clean namespaces"
task dirs: :gitlab_environment do task dirs: :gitlab_environment do
warn_user_is_not_gitlab namespaces = Set.new(Namespace.pluck(:path))
namespaces << Storage::HashedProject::ROOT_PATH_PREFIX
namespaces = Namespace.pluck(:path) Gitaly::Server.all.each do |server|
namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored all_dirs = Gitlab::GitalyClient::StorageService
Gitlab.config.repositories.storages.each do |name, repository_storage| .new(server.storage)
git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path } .list_directories(depth: 0)
all_dirs = Dir.glob(git_base_path + '/*') .reject { |dir| dir.ends_with?('.git') || namespaces.include?(File.basename(dir)) }
puts git_base_path.color(:yellow)
puts "Looking for directories to remove... " puts "Looking for directories to remove... "
all_dirs.reject! do |dir|
# skip if git repo
dir =~ /.git$/
end
all_dirs.reject! do |dir|
dir_name = File.basename dir
# skip if namespace present
namespaces.include?(dir_name)
end
all_dirs.each do |dir_path| all_dirs.each do |dir_path|
if remove? if remove?
if FileUtils.rm_rf dir_path begin
puts "Removed...#{dir_path}".color(:red) Gitlab::GitalyClient::NamespaceService.new(server.storage)
else .remove(dir_path)
puts "Cannot remove #{dir_path}".color(:red)
puts "Removed...#{dir_path}"
rescue StandardError => e
puts "Cannot remove #{dir_path}: #{e.message}".color(:red)
end end
else else
puts "Can be removed: #{dir_path}".color(:red) puts "Can be removed: #{dir_path}".color(:red)
...@@ -49,29 +38,29 @@ namespace :gitlab do ...@@ -49,29 +38,29 @@ namespace :gitlab do
desc "GitLab | Cleanup | Clean repositories" desc "GitLab | Cleanup | Clean repositories"
task repos: :gitlab_environment do task repos: :gitlab_environment do
warn_user_is_not_gitlab
move_suffix = "+orphaned+#{Time.now.to_i}" move_suffix = "+orphaned+#{Time.now.to_i}"
Gitlab.config.repositories.storages.each do |name, repository_storage|
repo_root = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path } Gitaly::Server.all.each do |server|
Gitlab::GitalyClient::StorageService
# Look for global repos (legacy, depth 1) and normal repos (depth 2) .new(server.storage)
IO.popen(%W(find #{repo_root} -mindepth 1 -maxdepth 2 -name *.git)) do |find| .list_directories
find.each_line do |path| .each do |path|
path.chomp! repo_with_namespace = path.chomp('.git').chomp('.wiki')
repo_with_namespace = path
.sub(repo_root, '') # TODO ignoring hashed repositories for now. But revisit to fully support
.sub(%r{^/*}, '') # possible orphaned hashed repos
.chomp('.git') next if repo_with_namespace.start_with?(Storage::HashedProject::ROOT_PATH_PREFIX)
.chomp('.wiki') next if Project.find_by_full_path(repo_with_namespace)
# TODO ignoring hashed repositories for now. But revisit to fully support new_path = path + move_suffix
# possible orphaned hashed repos puts path.inspect + ' -> ' + new_path.inspect
next if repo_with_namespace.start_with?("#{HASHED_REPOSITORY_NAME}/") || Project.find_by_full_path(repo_with_namespace)
begin
new_path = path + move_suffix Gitlab::GitalyClient::NamespaceService
puts path.inspect + ' -> ' + new_path.inspect .new(server.storage)
File.rename(path, new_path) .rename(path, new_path)
rescue StandardError => e
puts "Error occured while moving the repository: #{e.message}".color(:red)
end end
end end
end end
......
...@@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do ...@@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do
end end
describe 'cleanup namespaces and repos' do describe 'cleanup namespaces and repos' do
let(:gitlab_shell) { Gitlab::Shell.new }
let(:storage) { storages.keys.first }
let(:storages) do let(:storages) do
{ {
'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage')) 'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage'))
...@@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do ...@@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do
end end
before do before do
FileUtils.mkdir(Settings.absolute('tmp/tests/default_storage'))
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages) allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
end end
after do after do
FileUtils.rm_rf(Settings.absolute('tmp/tests/default_storage')) Gitlab::GitalyClient::StorageService.new(storage).delete_all_repositories
end end
describe 'cleanup:repos' do describe 'cleanup:repos' do
before do before do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/broken/project.git')) gitlab_shell.add_namespace(storage, 'broken/project.git')
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git')) gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
end end
it 'moves it to an orphaned path' do it 'moves it to an orphaned path' do
run_rake_task('gitlab:cleanup:repos') now = Time.now
repo_list = Dir['tmp/tests/default_storage/broken/*']
Timecop.freeze(now) do
run_rake_task('gitlab:cleanup:repos')
repo_list = Gitlab::GitalyClient::StorageService.new(storage).list_directories(depth: 0)
expect(repo_list.first).to include('+orphaned+') expect(repo_list.last).to include("broken+orphaned+#{now.to_i}")
end
end end
it 'ignores @hashed repos' do it 'ignores @hashed repos' do
run_rake_task('gitlab:cleanup:repos') run_rake_task('gitlab:cleanup:repos')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end end
end end
describe 'cleanup:dirs' do describe 'cleanup:dirs' do
it 'removes missing namespaces' do it 'removes missing namespaces' do
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_1/project.git")) gitlab_shell.add_namespace(storage, "namespace_1/project.git")
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_2/project.git")) gitlab_shell.add_namespace(storage, "namespace_2/project.git")
allow(Namespace).to receive(:pluck).and_return('namespace_1') allow(Namespace).to receive(:pluck).and_return(['namespace_1'])
stub_env('REMOVE', 'true') stub_env('REMOVE', 'true')
run_rake_task('gitlab:cleanup:dirs') run_rake_task('gitlab:cleanup:dirs')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_1'))).to be_truthy expect(gitlab_shell.exists?(storage, 'namespace_1')).to be(true)
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_2'))).to be_falsey expect(gitlab_shell.exists?(storage, 'namespace_2')).to be(false)
end end
it 'ignores @hashed directory' do it 'ignores @hashed directory' do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git')) gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
run_rake_task('gitlab:cleanup:dirs') run_rake_task('gitlab:cleanup:dirs')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment