Commit 81f4dc05 authored by Douwe Maan's avatar Douwe Maan

Merge branch 'zj-cleanup-port-gitaly' into 'master'

Port cleanup tasks to use Gitaly

Closes #40529 and gitaly#954

See merge request gitlab-org/gitlab-ce!21588
parents 30311a8b 3aedccb1
---
title: Administrative cleanup rake tasks now leverage Gitaly
merge_request: 21588
author:
type: changed
...@@ -5,6 +5,14 @@ module Gitlab ...@@ -5,6 +5,14 @@ module Gitlab
@storage = storage @storage = storage
end end
# Returns all directories in the git storage directory, lexically ordered
def list_directories(depth: 1)
request = Gitaly::ListDirectoriesRequest.new(storage_name: @storage, depth: depth)
GitalyClient.call(@storage, :storage_service, :list_directories, request)
.flat_map(&:paths)
end
# Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation. # Delete all repositories in the storage. This is a slow and VERY DESTRUCTIVE operation.
def delete_all_repositories def delete_all_repositories
request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage) request = Gitaly::DeleteAllRepositoriesRequest.new(storage_name: @storage)
......
# Gitaly migration: https://gitlab.com/gitlab-org/gitaly/issues/954 # frozen_string_literal: true
# require 'set'
namespace :gitlab do namespace :gitlab do
namespace :cleanup do namespace :cleanup do
HASHED_REPOSITORY_NAME = '@hashed'.freeze
desc "GitLab | Cleanup | Clean namespaces" desc "GitLab | Cleanup | Clean namespaces"
task dirs: :gitlab_environment do task dirs: :gitlab_environment do
warn_user_is_not_gitlab namespaces = Set.new(Namespace.pluck(:path))
namespaces << Storage::HashedProject::ROOT_PATH_PREFIX
namespaces = Namespace.pluck(:path) Gitaly::Server.all.each do |server|
namespaces << HASHED_REPOSITORY_NAME # add so that it will be ignored all_dirs = Gitlab::GitalyClient::StorageService
Gitlab.config.repositories.storages.each do |name, repository_storage| .new(server.storage)
git_base_path = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path } .list_directories(depth: 0)
all_dirs = Dir.glob(git_base_path + '/*') .reject { |dir| dir.ends_with?('.git') || namespaces.include?(File.basename(dir)) }
puts git_base_path.color(:yellow)
puts "Looking for directories to remove... " puts "Looking for directories to remove... "
all_dirs.reject! do |dir|
# skip if git repo
dir =~ /.git$/
end
all_dirs.reject! do |dir|
dir_name = File.basename dir
# skip if namespace present
namespaces.include?(dir_name)
end
all_dirs.each do |dir_path| all_dirs.each do |dir_path|
if remove? if remove?
if FileUtils.rm_rf dir_path begin
puts "Removed...#{dir_path}".color(:red) Gitlab::GitalyClient::NamespaceService.new(server.storage)
else .remove(dir_path)
puts "Cannot remove #{dir_path}".color(:red)
puts "Removed...#{dir_path}"
rescue StandardError => e
puts "Cannot remove #{dir_path}: #{e.message}".color(:red)
end end
else else
puts "Can be removed: #{dir_path}".color(:red) puts "Can be removed: #{dir_path}".color(:red)
...@@ -49,29 +38,29 @@ namespace :gitlab do ...@@ -49,29 +38,29 @@ namespace :gitlab do
desc "GitLab | Cleanup | Clean repositories" desc "GitLab | Cleanup | Clean repositories"
task repos: :gitlab_environment do task repos: :gitlab_environment do
warn_user_is_not_gitlab
move_suffix = "+orphaned+#{Time.now.to_i}" move_suffix = "+orphaned+#{Time.now.to_i}"
Gitlab.config.repositories.storages.each do |name, repository_storage|
repo_root = Gitlab::GitalyClient::StorageSettings.allow_disk_access { repository_storage.legacy_disk_path } Gitaly::Server.all.each do |server|
Gitlab::GitalyClient::StorageService
# Look for global repos (legacy, depth 1) and normal repos (depth 2) .new(server.storage)
IO.popen(%W(find #{repo_root} -mindepth 1 -maxdepth 2 -name *.git)) do |find| .list_directories
find.each_line do |path| .each do |path|
path.chomp! repo_with_namespace = path.chomp('.git').chomp('.wiki')
repo_with_namespace = path
.sub(repo_root, '') # TODO ignoring hashed repositories for now. But revisit to fully support
.sub(%r{^/*}, '') # possible orphaned hashed repos
.chomp('.git') next if repo_with_namespace.start_with?(Storage::HashedProject::ROOT_PATH_PREFIX)
.chomp('.wiki') next if Project.find_by_full_path(repo_with_namespace)
# TODO ignoring hashed repositories for now. But revisit to fully support new_path = path + move_suffix
# possible orphaned hashed repos puts path.inspect + ' -> ' + new_path.inspect
next if repo_with_namespace.start_with?("#{HASHED_REPOSITORY_NAME}/") || Project.find_by_full_path(repo_with_namespace)
begin
new_path = path + move_suffix Gitlab::GitalyClient::NamespaceService
puts path.inspect + ' -> ' + new_path.inspect .new(server.storage)
File.rename(path, new_path) .rename(path, new_path)
rescue StandardError => e
puts "Error occured while moving the repository: #{e.message}".color(:red)
end end
end end
end end
......
...@@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do ...@@ -6,6 +6,8 @@ describe 'gitlab:cleanup rake tasks' do
end end
describe 'cleanup namespaces and repos' do describe 'cleanup namespaces and repos' do
let(:gitlab_shell) { Gitlab::Shell.new }
let(:storage) { storages.keys.first }
let(:storages) do let(:storages) do
{ {
'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage')) 'default' => Gitlab::GitalyClient::StorageSettings.new(@default_storage_hash.merge('path' => 'tmp/tests/default_storage'))
...@@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do ...@@ -17,53 +19,56 @@ describe 'gitlab:cleanup rake tasks' do
end end
before do before do
FileUtils.mkdir(Settings.absolute('tmp/tests/default_storage'))
allow(Gitlab.config.repositories).to receive(:storages).and_return(storages) allow(Gitlab.config.repositories).to receive(:storages).and_return(storages)
end end
after do after do
FileUtils.rm_rf(Settings.absolute('tmp/tests/default_storage')) Gitlab::GitalyClient::StorageService.new(storage).delete_all_repositories
end end
describe 'cleanup:repos' do describe 'cleanup:repos' do
before do before do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/broken/project.git')) gitlab_shell.add_namespace(storage, 'broken/project.git')
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git')) gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
end end
it 'moves it to an orphaned path' do it 'moves it to an orphaned path' do
run_rake_task('gitlab:cleanup:repos') now = Time.now
repo_list = Dir['tmp/tests/default_storage/broken/*']
Timecop.freeze(now) do
run_rake_task('gitlab:cleanup:repos')
repo_list = Gitlab::GitalyClient::StorageService.new(storage).list_directories(depth: 0)
expect(repo_list.first).to include('+orphaned+') expect(repo_list.last).to include("broken+orphaned+#{now.to_i}")
end
end end
it 'ignores @hashed repos' do it 'ignores @hashed repos' do
run_rake_task('gitlab:cleanup:repos') run_rake_task('gitlab:cleanup:repos')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end end
end end
describe 'cleanup:dirs' do describe 'cleanup:dirs' do
it 'removes missing namespaces' do it 'removes missing namespaces' do
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_1/project.git")) gitlab_shell.add_namespace(storage, "namespace_1/project.git")
FileUtils.mkdir_p(Settings.absolute("tmp/tests/default_storage/namespace_2/project.git")) gitlab_shell.add_namespace(storage, "namespace_2/project.git")
allow(Namespace).to receive(:pluck).and_return('namespace_1') allow(Namespace).to receive(:pluck).and_return(['namespace_1'])
stub_env('REMOVE', 'true') stub_env('REMOVE', 'true')
run_rake_task('gitlab:cleanup:dirs') run_rake_task('gitlab:cleanup:dirs')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_1'))).to be_truthy expect(gitlab_shell.exists?(storage, 'namespace_1')).to be(true)
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/namespace_2'))).to be_falsey expect(gitlab_shell.exists?(storage, 'namespace_2')).to be(false)
end end
it 'ignores @hashed directory' do it 'ignores @hashed directory' do
FileUtils.mkdir_p(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git')) gitlab_shell.add_namespace(storage, '@hashed/12/34/5678.git')
run_rake_task('gitlab:cleanup:dirs') run_rake_task('gitlab:cleanup:dirs')
expect(Dir.exist?(Settings.absolute('tmp/tests/default_storage/@hashed/12/34/5678.git'))).to be_truthy expect(gitlab_shell.exists?(storage, '@hashed/12/34/5678.git')).to be(true)
end end
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment