Commit 40dc82f8 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'add-background-migrations-for-not-archived-traces' into 'master'

Add background migrations to archive legacy job traces

Closes #46642

See merge request gitlab-org/gitlab-ce!19194
parents 049519e7 8f1f73d4
...@@ -55,6 +55,11 @@ module Ci ...@@ -55,6 +55,11 @@ module Ci
where('(artifacts_file IS NOT NULL AND artifacts_file <> ?) OR EXISTS (?)', where('(artifacts_file IS NOT NULL AND artifacts_file <> ?) OR EXISTS (?)',
'', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').archive) '', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').archive)
end end
scope :without_archived_trace, ->() do
where('NOT EXISTS (?)', Ci::JobArtifact.select(1).where('ci_builds.id = ci_job_artifacts.job_id').trace)
end
scope :with_artifacts_stored_locally, -> { with_artifacts_archive.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) } scope :with_artifacts_stored_locally, -> { with_artifacts_archive.where(artifacts_file_store: [nil, LegacyArtifactUploader::Store::LOCAL]) }
scope :with_artifacts_not_expired, ->() { with_artifacts_archive.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) } scope :with_artifacts_not_expired, ->() { with_artifacts_archive.where('artifacts_expire_at IS NULL OR artifacts_expire_at > ?', Time.now) }
scope :with_expired_artifacts, ->() { with_artifacts_archive.where('artifacts_expire_at < ?', Time.now) } scope :with_expired_artifacts, ->() { with_artifacts_archive.where('artifacts_expire_at < ?', Time.now) }
......
---
title: Add background migrations for archiving legacy job traces
merge_request: 19194
author:
type: performance
class ScheduleToArchiveLegacyTraces < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
BATCH_SIZE = 5000
BACKGROUND_MIGRATION_CLASS = 'ArchiveLegacyTraces'
disable_ddl_transaction!
class Build < ActiveRecord::Base
include EachBatch
self.table_name = 'ci_builds'
self.inheritance_column = :_type_disabled # Disable STI
scope :type_build, -> { where(type: 'Ci::Build') }
scope :finished, -> { where(status: [:success, :failed, :canceled]) }
scope :without_archived_trace, -> do
where('NOT EXISTS (SELECT 1 FROM ci_job_artifacts WHERE ci_builds.id = ci_job_artifacts.job_id AND ci_job_artifacts.file_type = 3)')
end
end
def up
queue_background_migration_jobs_by_range_at_intervals(
::ScheduleToArchiveLegacyTraces::Build.type_build.finished.without_archived_trace,
BACKGROUND_MIGRATION_CLASS,
5.minutes,
batch_size: BATCH_SIZE)
end
def down
# noop
end
end
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180529093006) do ActiveRecord::Schema.define(version: 20180529152628) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "plpgsql" enable_extension "plpgsql"
......
# frozen_string_literal: true
# rubocop:disable Metrics/AbcSize
# rubocop:disable Style/Documentation
module Gitlab
module BackgroundMigration
class ArchiveLegacyTraces
def perform(start_id, stop_id)
# This background migration directly refers to ::Ci::Build model which is defined in application code.
# In general, migration code should be isolated as much as possible in order to be idempotent.
# However, `archive!` method is too complicated to be replicated by coping its subsequent code.
# So we chose a way to use ::Ci::Build directly and we don't change the `archive!` method until 11.1
::Ci::Build.finished.without_archived_trace
.where(id: start_id..stop_id).find_each do |build|
begin
build.trace.archive!
rescue => e
Rails.logger.error "Failed to archive live trace. id: #{build.id} message: #{e.message}"
end
end
end
end
end
end
...@@ -8,9 +8,7 @@ namespace :gitlab do ...@@ -8,9 +8,7 @@ namespace :gitlab do
logger = Logger.new(STDOUT) logger = Logger.new(STDOUT)
logger.info('Archiving legacy traces') logger.info('Archiving legacy traces')
Ci::Build.finished Ci::Build.finished.without_archived_trace
.where('NOT EXISTS (?)',
Ci::JobArtifact.select(1).trace.where('ci_builds.id = ci_job_artifacts.job_id'))
.order(id: :asc) .order(id: :asc)
.find_in_batches(batch_size: 1000) do |jobs| .find_in_batches(batch_size: 1000) do |jobs|
job_ids = jobs.map { |job| [job.id] } job_ids = jobs.map { |job| [job.id] }
......
require 'spec_helper'
describe Gitlab::BackgroundMigration::ArchiveLegacyTraces, :migration, schema: 20180529152628 do
include TraceHelpers
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:builds) { table(:ci_builds) }
let(:job_artifacts) { table(:ci_job_artifacts) }
before do
namespaces.create!(id: 123, name: 'gitlab1', path: 'gitlab1')
projects.create!(id: 123, name: 'gitlab1', path: 'gitlab1', namespace_id: 123)
@build = builds.create!(id: 1, project_id: 123, status: 'success', type: 'Ci::Build')
end
context 'when trace file exsits at the right place' do
before do
create_legacy_trace(@build, 'trace in file')
end
it 'correctly archive legacy traces' do
expect(job_artifacts.count).to eq(0)
expect(File.exist?(legacy_trace_path(@build))).to be_truthy
described_class.new.perform(1, 1)
expect(job_artifacts.count).to eq(1)
expect(File.exist?(legacy_trace_path(@build))).to be_falsy
expect(File.read(archived_trace_path(job_artifacts.first))).to eq('trace in file')
end
end
context 'when trace file does not exsits at the right place' do
it 'does not raise errors nor create job artifact' do
expect { described_class.new.perform(1, 1) }.not_to raise_error
expect(job_artifacts.count).to eq(0)
end
end
context 'when trace data exsits in database' do
before do
create_legacy_trace_in_db(@build, 'trace in db')
end
it 'correctly archive legacy traces' do
expect(job_artifacts.count).to eq(0)
expect(@build.read_attribute(:trace)).not_to be_empty
described_class.new.perform(1, 1)
@build.reload
expect(job_artifacts.count).to eq(1)
expect(@build.read_attribute(:trace)).to be_nil
expect(File.read(archived_trace_path(job_artifacts.first))).to eq('trace in db')
end
end
end
require 'spec_helper'
require Rails.root.join('db', 'post_migrate', '20180529152628_schedule_to_archive_legacy_traces')
describe ScheduleToArchiveLegacyTraces, :migration do
include TraceHelpers
let(:namespaces) { table(:namespaces) }
let(:projects) { table(:projects) }
let(:builds) { table(:ci_builds) }
let(:job_artifacts) { table(:ci_job_artifacts) }
before do
namespaces.create!(id: 123, name: 'gitlab1', path: 'gitlab1')
projects.create!(id: 123, name: 'gitlab1', path: 'gitlab1', namespace_id: 123)
@build_success = builds.create!(id: 1, project_id: 123, status: 'success', type: 'Ci::Build')
@build_failed = builds.create!(id: 2, project_id: 123, status: 'failed', type: 'Ci::Build')
@builds_canceled = builds.create!(id: 3, project_id: 123, status: 'canceled', type: 'Ci::Build')
@build_running = builds.create!(id: 4, project_id: 123, status: 'running', type: 'Ci::Build')
create_legacy_trace(@build_success, 'This job is done')
create_legacy_trace(@build_failed, 'This job is done')
create_legacy_trace(@builds_canceled, 'This job is done')
create_legacy_trace(@build_running, 'This job is not done yet')
end
it 'correctly archive legacy traces' do
expect(job_artifacts.count).to eq(0)
expect(File.exist?(legacy_trace_path(@build_success))).to be_truthy
expect(File.exist?(legacy_trace_path(@build_failed))).to be_truthy
expect(File.exist?(legacy_trace_path(@builds_canceled))).to be_truthy
expect(File.exist?(legacy_trace_path(@build_running))).to be_truthy
migrate!
expect(job_artifacts.count).to eq(3)
expect(File.exist?(legacy_trace_path(@build_success))).to be_falsy
expect(File.exist?(legacy_trace_path(@build_failed))).to be_falsy
expect(File.exist?(legacy_trace_path(@builds_canceled))).to be_falsy
expect(File.exist?(legacy_trace_path(@build_running))).to be_truthy
expect(File.exist?(archived_trace_path(job_artifacts.where(job_id: @build_success.id).first))).to be_truthy
expect(File.exist?(archived_trace_path(job_artifacts.where(job_id: @build_failed.id).first))).to be_truthy
expect(File.exist?(archived_trace_path(job_artifacts.where(job_id: @builds_canceled.id).first))).to be_truthy
expect(job_artifacts.where(job_id: @build_running.id)).not_to be_exist
end
end
module TraceHelpers
def create_legacy_trace(build, content)
File.open(legacy_trace_path(build), 'wb') { |stream| stream.write(content) }
end
def create_legacy_trace_in_db(build, content)
build.update_column(:trace, content)
end
def legacy_trace_path(build)
legacy_trace_dir = File.join(Settings.gitlab_ci.builds_path,
build.created_at.utc.strftime("%Y_%m"),
build.project_id.to_s)
FileUtils.mkdir_p(legacy_trace_dir)
File.join(legacy_trace_dir, "#{build.id}.log")
end
def archived_trace_path(job_artifact)
disk_hash = Digest::SHA2.hexdigest(job_artifact.project_id.to_s)
creation_date = job_artifact.created_at.utc.strftime('%Y_%m_%d')
File.join(Gitlab.config.artifacts.path, disk_hash[0..1], disk_hash[2..3], disk_hash,
creation_date, job_artifact.job_id.to_s, job_artifact.id.to_s, 'job.log')
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment