Commit dabd91b2 authored by Toon Claes's avatar Toon Claes

Add rake task to clean orphan artifact files

This adds the rake task rake
gitlab:cleanup:orphan_job_artifact_files. This rake task cleans all
orphan job artifact files it can find on disk.

 It performs a search on the complete folder of all artifacts on
disk. Then it filters out all the job artifact ID for which it could
not find a record with matching ID in the database. For these, the
file is deleted from disk.
parent 8df6508c
---
title: Add rake task to clean orphan artifact files
merge_request: 29681
author:
type: added
...@@ -504,6 +504,15 @@ To resolve this, run the following command: ...@@ -504,6 +504,15 @@ To resolve this, run the following command:
sudo gitlab-rake geo:db:refresh_foreign_tables sudo gitlab-rake geo:db:refresh_foreign_tables
``` ```
## Expired artifacts
If you notice for some reason there are more artifacts on the Geo
secondary node than on the Geo primary node, you can use the rake task
to [cleanup orphan artifact files](../../../raketasks/cleanup.md#remove-orphan-artifact-files).
On a Geo **secondary** node, this command will also clean up all Geo
registry record related to the orphan files on disk.
## Fixing common errors ## Fixing common errors
This section documents common errors reported in the Admin UI and how to fix them. This section documents common errors reported in the Admin UI and how to fix them.
......
...@@ -92,3 +92,48 @@ I, [2018-08-02T10:26:47.598424 #45087] INFO -- : Looking for orphaned remote up ...@@ -92,3 +92,48 @@ I, [2018-08-02T10:26:47.598424 #45087] INFO -- : Looking for orphaned remote up
I, [2018-08-02T10:26:47.753131 #45087] INFO -- : Moved to lost and found: @hashed/6b/DSC_6152.JPG -> lost_and_found/@hashed/6b/DSC_6152.JPG I, [2018-08-02T10:26:47.753131 #45087] INFO -- : Moved to lost and found: @hashed/6b/DSC_6152.JPG -> lost_and_found/@hashed/6b/DSC_6152.JPG
I, [2018-08-02T10:26:47.764356 #45087] INFO -- : Moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg -> lost_and_found/@hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg I, [2018-08-02T10:26:47.764356 #45087] INFO -- : Moved to lost and found: @hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg -> lost_and_found/@hashed/79/02/7902699be42c8a8e46fbbb4501726517e86b22c56a189f7625a6da49081b2451/711491b29d3eb08837798c4909e2aa4d/DSC00314.jpg
``` ```
## Remove orphan artifact files
When you notice there are more job artifacts files on disk than there
should be, you can run:
```shell
gitlab-rake gitlab:cleanup:orphan_job_artifact_files
```
This command:
- Scans through the entire artifacts folder.
- Checks which files still have a record in the database.
- If no database record is found, the file is deleted from disk.
By default, this task does not delete anything but shows what it can
delete. Run the command with `DRY_RUN=false` if you actually want to
delete the files:
```shell
gitlab-rake gitlab:cleanup:orphan_job_artifact_files DRY_RUN=false
```
You can also limit the number of files to delete with `LIMIT`:
```shell
gitlab-rake gitlab:cleanup:orphan_job_artifact_files LIMIT=100`
```
This will only delete up to 100 files from disk. You can use this to
delete a small set for testing purposes.
If you provide `DEBUG=1`, you'll see the full path of every file that
is detected as being an orphan.
If `ionice` is installed, the tasks uses it to ensure the command is
not causing too much load on the disk. You can configure the niceness
level with `NICENESS`. Below are the valid levels, but consult
`man 1 ionice` to be sure.
- `0` or `None`
- `1` or `Realtime`
- `2` or `Best-effort` (default)
- `3` or `Idle`
# frozen_string_literal: true
module Gitlab
module Cleanup
class OrphanJobArtifactFiles
include Gitlab::Utils::StrongMemoize
ABSOLUTE_ARTIFACT_DIR = ::JobArtifactUploader.root.freeze
LOST_AND_FOUND = File.join(ABSOLUTE_ARTIFACT_DIR, '-', 'lost+found').freeze
BATCH_SIZE = 500
DEFAULT_NICENESS = 'Best-effort'
attr_accessor :batch, :total_found, :total_cleaned
attr_reader :limit, :dry_run, :niceness, :logger
def initialize(limit: nil, dry_run: true, niceness: nil, logger: nil)
@limit = limit
@dry_run = dry_run
@niceness = niceness || DEFAULT_NICENESS
@logger = logger || Rails.logger
@total_found = @total_cleaned = 0
new_batch!
end
def run!
log_info('Looking for orphan job artifacts to clean up')
find_artifacts do |artifact_file|
batch << artifact_file
clean_batch! if batch.full?
break if limit_reached?
end
clean_batch!
log_info("Processed #{total_found} job artifacts to find and clean #{total_cleaned} orphans.")
end
private
def new_batch!
self.batch = ::Gitlab::Cleanup::OrphanJobArtifactFilesBatch
.new(batch_size: batch_size, logger: logger, dry_run: dry_run)
end
def clean_batch!
batch.clean!
update_stats!(batch)
new_batch!
end
def update_stats!(batch)
self.total_found += batch.artifact_files.count
self.total_cleaned += batch.lost_and_found.count
end
def limit_reached?
return false unless limit
total_cleaned >= limit
end
def batch_size
return BATCH_SIZE unless limit
return if limit_reached?
todo = limit - total_cleaned
[BATCH_SIZE, todo].min
end
def find_artifacts
Open3.popen3(*find_command) do |stdin, stdout, stderr, status_thread|
stdout.each_line do |line|
yield line
end
log_error(stderr.read.color(:red)) unless status_thread.value.success?
end
end
def find_command
strong_memoize(:find_command) do
cmd = %W[find -L #{absolute_artifact_dir}]
# Search for Job Artifact IDs, they are found 6 directory
# levels deep. For example:
# shared/artifacts/2c/62/2c...a3/2019_02_27/836/628/job.log
# 1 2 3 4 5 6
# | | | ^- date | ^- Job Artifact ID
# | | | ^- Job ID
# ^--+--+- components of hashed storage project path
cmd += %w[-mindepth 6 -maxdepth 6]
# Artifact directories are named on their ID
cmd += %w[-type d]
if ionice
raise ArgumentError, 'Invalid niceness' unless niceness.match?(/^\w[\w\-]*$/)
cmd.unshift(*%W[#{ionice} --class #{niceness}])
end
log_info("find command: '#{cmd.join(' ')}'")
cmd
end
end
def absolute_artifact_dir
File.absolute_path(ABSOLUTE_ARTIFACT_DIR)
end
def ionice
strong_memoize(:ionice) do
Gitlab::Utils.which('ionice')
end
end
def log_info(msg, params = {})
logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
end
def log_error(msg, params = {})
logger.error(msg)
end
end
end
end
# frozen_string_literal: true
module Gitlab
module Cleanup
class OrphanJobArtifactFilesBatch
BatchFull = Class.new(StandardError)
class ArtifactFile
attr_accessor :path
def initialize(path)
@path = path
end
def artifact_id
path.split('/').last.to_i
end
end
include Gitlab::Utils::StrongMemoize
attr_reader :batch_size, :dry_run
attr_accessor :artifact_files
def initialize(batch_size:, dry_run: true, logger: Rails.logger)
@batch_size = batch_size
@dry_run = dry_run
@logger = logger
@artifact_files = []
end
def clean!
return if artifact_files.empty?
lost_and_found.each do |artifact|
clean_one!(artifact)
end
end
def full?
artifact_files.count >= batch_size
end
def <<(artifact_path)
raise BatchFull, "Batch full! Already contains #{artifact_files.count} artifacts" if full?
artifact_files << ArtifactFile.new(artifact_path)
end
def lost_and_found
strong_memoize(:lost_and_found) do
artifact_file_ids = artifact_files.map(&:artifact_id)
existing_artifact_ids = ::Ci::JobArtifact.id_in(artifact_file_ids).pluck_primary_key
artifact_files.reject { |artifact| existing_artifact_ids.include?(artifact.artifact_id) }
end
end
private
def clean_one!(artifact_file)
log_debug("Found orphan job artifact file @ #{artifact_file.path}")
remove_file!(artifact_file) unless dry_run
end
def remove_file!(artifact_file)
FileUtils.rm_rf(artifact_file.path)
end
def log_info(msg, params = {})
@logger.info("#{'[DRY RUN]' if dry_run} #{msg}")
end
def log_debug(msg, params = {})
@logger.debug(msg)
end
end
end
end
...@@ -115,6 +115,18 @@ namespace :gitlab do ...@@ -115,6 +115,18 @@ namespace :gitlab do
end end
end end
desc 'GitLab | Cleanup | Clean orphan job artifact files'
task orphan_job_artifact_files: :gitlab_environment do
warn_user_is_not_gitlab
cleaner = Gitlab::Cleanup::OrphanJobArtifactFiles.new(limit: limit, dry_run: dry_run?, niceness: niceness, logger: logger)
cleaner.run!
if dry_run?
logger.info "To clean up these files run this command with DRY_RUN=false".color(:yellow)
end
end
def remove? def remove?
ENV['REMOVE'] == 'true' ENV['REMOVE'] == 'true'
end end
...@@ -123,12 +135,25 @@ namespace :gitlab do ...@@ -123,12 +135,25 @@ namespace :gitlab do
ENV['DRY_RUN'] != 'false' ENV['DRY_RUN'] != 'false'
end end
def debug?
ENV['DEBUG'].present?
end
def limit
ENV['LIMIT']&.to_i
end
def niceness
ENV['NICENESS'].presence
end
def logger def logger
return @logger if defined?(@logger) return @logger if defined?(@logger)
@logger = if Rails.env.development? || Rails.env.production? @logger = if Rails.env.development? || Rails.env.production?
Logger.new(STDOUT).tap do |stdout_logger| Logger.new(STDOUT).tap do |stdout_logger|
stdout_logger.extend(ActiveSupport::Logger.broadcast(Rails.logger)) stdout_logger.extend(ActiveSupport::Logger.broadcast(Rails.logger))
stdout_logger.level = debug? ? Logger::DEBUG : Logger::INFO
end end
else else
Rails.logger Rails.logger
......
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Cleanup::OrphanJobArtifactFilesBatch do
let(:batch_size) { 10 }
let(:dry_run) { true }
subject(:batch) { described_class.new(batch_size: batch_size, dry_run: dry_run) }
context 'no dry run' do
let(:dry_run) { false }
it 'deletes only orphan job artifacts from disk' do
job_artifact = create(:ci_job_artifact, :archive)
orphan_artifact = create(:ci_job_artifact, :archive)
batch << artifact_path(job_artifact)
batch << artifact_path(orphan_artifact)
orphan_artifact.delete
batch.clean!
expect(batch.artifact_files.count).to eq(2)
expect(batch.lost_and_found.count).to eq(1)
expect(batch.lost_and_found.first.artifact_id).to eq(orphan_artifact.id)
end
it 'does not mix up job ID and artifact ID' do
# take maximum ID of both tables to avoid any collision
max_id = [Ci::Build.maximum(:id), Ci::JobArtifact.maximum(:id)].compact.max.to_i
job_a = create(:ci_build, id: max_id + 1)
job_b = create(:ci_build, id: max_id + 2)
# reuse the build IDs for the job artifact IDs, but swap them
job_artifact_b = create(:ci_job_artifact, :archive, job: job_b, id: max_id + 1)
job_artifact_a = create(:ci_job_artifact, :archive, job: job_a, id: max_id + 2)
batch << artifact_path(job_artifact_a)
batch << artifact_path(job_artifact_b)
job_artifact_b.delete
batch.clean!
expect(File.exist?(job_artifact_a.file.path)).to be_truthy
expect(File.exist?(job_artifact_b.file.path)).to be_falsey
end
end
context 'with dry run' do
it 'does not remove files' do
job_artifact = create(:ci_job_artifact, :archive)
batch << job_artifact.file.path
job_artifact.delete
expect(batch).not_to receive(:remove_file!)
batch.clean!
expect(File.exist?(job_artifact.file.path)).to be_truthy
end
end
def artifact_path(job_artifact)
Pathname.new(job_artifact.file.path).parent.to_s
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Gitlab::Cleanup::OrphanJobArtifactFiles do
let(:null_logger) { Logger.new('/dev/null') }
subject(:cleanup) { described_class.new(logger: null_logger) }
before do
allow(null_logger).to receive(:info)
end
it 'passes on dry_run' do
expect(Gitlab::Cleanup::OrphanJobArtifactFilesBatch)
.to receive(:new)
.with(dry_run: false, batch_size: anything, logger: anything)
.at_least(:once)
.and_call_original
described_class.new(dry_run: false).run!
end
it 'errors when invalid niceness is given' do
cleanup = described_class.new(logger: null_logger, niceness: 'FooBar')
expect(null_logger).to receive(:error).with(/FooBar/)
cleanup.run!
end
it 'finds artifacts on disk' do
artifact = create(:ci_job_artifact, :archive)
expect(cleanup).to receive(:find_artifacts).and_yield(artifact.file.path)
cleanup.run!
end
it 'stops when limit is reached' do
cleanup = described_class.new(limit: 1)
mock_artifacts_found(cleanup, 'tmp/foo/bar/1', 'tmp/foo/bar/2')
cleanup.run!
expect(cleanup.total_found).to eq(1)
end
it 'cleans even if batch is not full' do
mock_artifacts_found(cleanup, 'tmp/foo/bar/1')
expect(cleanup).to receive(:clean_batch!).and_call_original
cleanup.run!
end
it 'cleans in batches' do
stub_const("#{described_class.name}::BATCH_SIZE", 2)
mock_artifacts_found(cleanup, 'tmp/foo/bar/1', 'tmp/foo/bar/2', 'tmp/foo/bar/3')
expect(cleanup).to receive(:clean_batch!).twice.and_call_original
cleanup.run!
end
def mock_artifacts_found(cleanup, *files)
mock = allow(cleanup).to receive(:find_artifacts)
files.each { |file| mock.and_yield(file) }
end
end
...@@ -156,4 +156,33 @@ describe 'gitlab:cleanup rake tasks' do ...@@ -156,4 +156,33 @@ describe 'gitlab:cleanup rake tasks' do
end end
end end
end end
describe 'gitlab:cleanup:orphan_job_artifact_files' do
subject(:rake_task) { run_rake_task('gitlab:cleanup:orphan_job_artifact_files') }
it 'runs the task without errors' do
expect(Gitlab::Cleanup::OrphanJobArtifactFiles)
.to receive(:new).and_call_original
expect { rake_task }.not_to raise_error
end
context 'with DRY_RUN set to false' do
before do
stub_env('DRY_RUN', 'false')
end
it 'passes dry_run correctly' do
expect(Gitlab::Cleanup::OrphanJobArtifactFiles)
.to receive(:new)
.with(limit: anything,
dry_run: false,
niceness: anything,
logger: anything)
.and_call_original
rake_task
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment