Commit 5a1ee0c3 authored by Shinya Maeda's avatar Shinya Maeda

Fix the query to select stale live traces

parent 2084e7ab
......@@ -66,6 +66,7 @@ module Ci
scope :last_month, ->() { where('created_at > ?', Date.today - 1.month) }
scope :manual_actions, ->() { where(when: :manual, status: COMPLETED_STATUSES + [:manual]) }
scope :ref_protected, -> { where(protected: true) }
scope :with_live_trace, -> { where('EXISTS (?)', Ci::BuildTraceChunk.where('ci_builds.id = ci_build_trace_chunks.build_id').select(1)) }
scope :matches_tag_ids, -> (tag_ids) do
matcher = ::ActsAsTaggableOn::Tagging
......@@ -583,21 +584,6 @@ module Ci
super(options).merge(when: read_attribute(:when))
end
# Find stale live traces and return their build ids
def self.find_builds_from_stale_live_traces
binding.pry
Ci::BuildTraceChunk
.include(EachBatch).select(:build_id).group(:build_id).joins(:build)
.merge(Ci::Build.finished).where('ci_builds.finished_at < ?', 1.hour.ago)
.each_batch(column: :build_id) do |chunks|
build_ids = chunks.map { |chunk| chunk.build_id }
binding.pry
yield where(id: build_ids)
end
end
private
def update_artifacts_size
......
......@@ -4,16 +4,14 @@ module Ci
include CronjobQueue
def perform
# Archive live traces which still resides in redis or database
# This could happen when sidekiq-jobs for archivements are lost by SIGKILL
# Issue: https://gitlab.com/gitlab-org/gitlab-ce/issues/36791
Ci::Build.find_builds_from_stale_live_traces do |builds|
builds.each do |build|
begin
build.trace.archive!
rescue => e
Rails.logger.error "Failed to archive stale live trace. id: #{build.id} message: #{e.message}"
end
# Archive stale live traces which still resides in redis or database
# This could happen when ArchiveTraceWorker sidekiq jobs were lost by receiving SIGKILL
# More details in https://gitlab.com/gitlab-org/gitlab-ce/issues/36791
Ci::Build.finished.with_live_trace.find_each(batch_size: 100) do |build|
begin
build.trace.archive!
rescue => e
Rails.logger.error "Failed to archive stale live trace. id: #{build.id} message: #{e.message}"
end
end
end
......
......@@ -109,31 +109,35 @@ module Gitlab
end
def archive!
try_obtain_lease do
unsafe_archive!
end
end
private
def unsafe_archive!
raise ArchiveError, 'Already archived' if trace_artifact
raise ArchiveError, 'Job is not finished yet' unless job.complete?
try_obtain_lease do
if job.trace_chunks.any?
Gitlab::Ci::Trace::ChunkedIO.new(job) do |stream|
archive_stream!(stream)
stream.destroy!
end
elsif current_path
File.open(current_path) do |stream|
archive_stream!(stream)
FileUtils.rm(current_path)
end
elsif old_trace
StringIO.new(old_trace, 'rb').tap do |stream|
archive_stream!(stream)
job.erase_old_trace!
end
if job.trace_chunks.any?
Gitlab::Ci::Trace::ChunkedIO.new(job) do |stream|
archive_stream!(stream)
stream.destroy!
end
elsif current_path
File.open(current_path) do |stream|
archive_stream!(stream)
FileUtils.rm(current_path)
end
elsif old_trace
StringIO.new(old_trace, 'rb').tap do |stream|
archive_stream!(stream)
job.erase_old_trace!
end
end
end
private
def archive_stream!(stream)
clone_file!(stream, JobArtifactUploader.workhorse_upload_path) do |clone_path|
create_build_trace!(job, clone_path)
......@@ -213,7 +217,7 @@ module Gitlab
job.job_artifacts_trace
end
# For ExclusiveLeaseGuard concerns
# For ExclusiveLeaseGuard concern
def lease_key
@lease_key ||= "trace:archive:#{job.id}"
end
......
......@@ -116,6 +116,26 @@ describe Ci::Build do
end
end
describe '.with_live_trace' do
subject { described_class.with_live_trace }
context 'when build has live trace' do
let!(:build) { create(:ci_build, :success, :trace_live) }
it 'selects the build' do
is_expected.to eq([build])
end
end
context 'when build does not have live trace' do
let!(:build) { create(:ci_build, :success, :trace_artifact) }
it 'selects the build' do
is_expected.to be_empty
end
end
end
describe '#actionize' do
context 'when build is a created' do
before do
......
......@@ -35,46 +35,6 @@ describe Ci::BuildTraceChunk, :clean_gitlab_redis_shared_state do
end
end
describe '.find_builds_from_stale_live_traces' do
subject { described_class.find_builds_from_stale_live_traces }
context 'when build status is finished' do
context 'when build finished 2 days ago' do
context 'when build has an archived trace' do
let!(:build) { create(:ci_build, :success, :trace_artifact, finished_at: 2.days.ago) }
it 'does not yield build id' do
expect { |b| described_class.find_builds_from_stale_live_traces(&b) }.not_to yield_control
end
end
context 'when build has a live trace' do
let!(:build) { create(:ci_build, :success, :trace_live, finished_at: 2.days.ago) }
it 'yields build id' do
expect { |b| described_class.find_builds_from_stale_live_traces(&b) }.to yield_with_args([build.id])
end
end
end
context 'when build finished 10 minutes ago' do
let!(:build) { create(:ci_build, :success, :trace_live, finished_at: 10.minutes.ago) }
it 'does not yield build id' do
expect { |b| described_class.find_builds_from_stale_live_traces(&b) }.not_to yield_control
end
end
end
context 'when build status is running' do
let!(:build) { create(:ci_build, :running, :trace_live) }
it 'does not yield build id' do
expect { |b| described_class.find_builds_from_stale_live_traces(&b) }.not_to yield_control
end
end
end
describe '#data' do
subject { build_trace_chunk.data }
......
......@@ -25,57 +25,32 @@ describe Ci::RescueStaleLiveTraceWorker do
end
end
context 'when a job was succeeded 2 hours ago' do
context 'when a job was succeeded' do
let!(:build) { create(:ci_build, :success, :trace_live) }
before do
build.update(finished_at: 2.hours.ago)
end
it_behaves_like 'archives trace'
# context 'when build has both archived trace and live trace' do
# let!(:build2) { create(:ci_build, :success, :trace_live, finished_at: 2.days.ago) }
context 'when archive raised an exception' do
let!(:build) { create(:ci_build, :success, :trace_artifact, :trace_live) }
let!(:build2) { create(:ci_build, :success, :trace_live) }
# it 'archives only available targets' do
# subject
# build.reload
# expect(build.job_artifacts_trace).to be_exist
# end
# end
end
it 'archives valid targets' do
expect(Rails.logger).to receive(:error).with("Failed to archive stale live trace. id: #{build.id} message: Already archived")
context 'when a job was failed 2 hours ago' do
let!(:build) { create(:ci_build, :failed, :trace_live) }
subject
before do
build.update(finished_at: 2.hours.ago)
build2.reload
expect(build2.job_artifacts_trace).to be_exist
end
end
it_behaves_like 'archives trace'
end
context 'when a job was cancelled 2 hours ago' do
context 'when a job was cancelled' do
let!(:build) { create(:ci_build, :canceled, :trace_live) }
before do
build.update(finished_at: 2.hours.ago)
end
it_behaves_like 'archives trace'
end
context 'when a job has been finished 10 minutes ago' do
let!(:build) { create(:ci_build, :success, :trace_live) }
before do
build.update(finished_at: 10.minutes.ago)
end
it_behaves_like 'does not archive trace'
end
context 'when a job is running' do
let!(:build) { create(:ci_build, :running, :trace_live) }
......
......@@ -134,8 +134,8 @@ describe StuckCiJobsWorker do
it 'cancels exclusive lease after worker perform' do
worker.perform
expect(Gitlab::ExclusiveLease.new(described_class::EXCLUSIVE_LEASE_KEY, timeout: 1.hour).exists?)
.to be_falsy
expect(Gitlab::ExclusiveLease.new(described_class::EXCLUSIVE_LEASE_KEY, timeout: 1.hour))
.not_to be_exists
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment