Commit a3c15673 authored by Alex Kalderimis's avatar Alex Kalderimis

Add rake task to prune hashed projects

This adds `gitlab:storage:prune_hashed_projects`, which removes
directories not associated with any known project.

This is a development-only task, and must be run with `FORCE=1` to
actually perform the deletions.
parent 29713000
# frozen_string_literal: true
require 'find'
module Gitlab
module HashedStorage
module RakeHelper
......@@ -65,6 +67,7 @@ module Gitlab
def self.projects_list(relation_name, relation)
listing(relation_name, relation.with_route) do |project|
$stdout.puts " - #{project.full_path} (id: #{project.id})".color(:red)
$stdout.puts " #{project.repository.disk_path}"
end
end
......@@ -92,6 +95,43 @@ module Gitlab
end
end
# rubocop: enable CodeReuse/ActiveRecord
def self.prune(relation_name, relation)
root = ENV['GDK_REPOSITORY_ROOT'].presence || '../repositories'
dry_run = !ENV['FORCE'].present?
known_paths = Set.new
listing(name, relation) { |p| known_paths << "#{root}/#{p.repository.disk_path}" }
marked_for_deletion = Set.new
prefix_length = Pathname.new(root).ascend.count
Find.find("#{root}/@hashed") do |path|
path = Pathname.new(path)
next unless path.directory?
path.ascend do |p|
base = p.to_s.gsub(/\.(\w+\.)?git$/, '')
Find.prune if known_paths.include?(base)
end
if path.ascend.count == prefix_length + 4
marked_for_deletion << path
Find.prune
end
end
$stdout.puts "Dry run. We would have deleted:" if dry_run
marked_for_deletion.each do |p|
if dry_run
$stdout.puts " - #{p}"
else
$stdout.puts "Removing #{p}"
p.rmtree
end
end
end
end
end
end
require 'find'
namespace :gitlab do
namespace :storage do
desc 'GitLab | Storage | Migrate existing projects to Hashed Storage'
......@@ -116,6 +118,19 @@ namespace :gitlab do
helper.projects_list('projects using Hashed Storage', Project.with_storage_feature(:repository))
end
desc 'Gitlab | Storage | Prune projects using Hashed Storage. Remove all hashed directories that do not have a project associated'
task prune_hashed_projects: :environment do
if Rails.env.production?
abort('This destructive action may only be run in development')
end
helper = Gitlab::HashedStorage::RakeHelper
name = 'projects using Hashed Storage'
relation = Project.with_storage_feature(:repository)
helper.prune(name, relation)
end
desc 'Gitlab | Storage | Summary of project attachments using Legacy Storage'
task legacy_attachments: :environment do
helper = Gitlab::HashedStorage::RakeHelper
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment