Commit 62a2b486 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Add support for storing build pending state

Build pending state is a final build state that has been submitted by a
runner, but because of various tasks that still need to be performed we
have told the runner that it needs to retry and send the state again.
We still store the first state submitted in case of a runner failure or
a runner behaving incorrectly (for example - when it is submitting a
different state in subsequent requests).
parent 19d7626c
...@@ -38,8 +38,9 @@ module Ci ...@@ -38,8 +38,9 @@ module Ci
has_one :deployment, as: :deployable, class_name: 'Deployment' has_one :deployment, as: :deployable, class_name: 'Deployment'
has_one :resource, class_name: 'Ci::Resource', inverse_of: :build has_one :resource, class_name: 'Ci::Resource', inverse_of: :build
has_one :pending_state, class_name: 'Ci::BuildPendingState', inverse_of: :build
has_many :trace_sections, class_name: 'Ci::BuildTraceSection' has_many :trace_sections, class_name: 'Ci::BuildTraceSection'
has_many :trace_chunks, class_name: 'Ci::BuildTraceChunk', foreign_key: :build_id has_many :trace_chunks, class_name: 'Ci::BuildTraceChunk', foreign_key: :build_id, inverse_of: :build
has_many :report_results, class_name: 'Ci::BuildReportResult', inverse_of: :build has_many :report_results, class_name: 'Ci::BuildReportResult', inverse_of: :build
has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent has_many :job_artifacts, class_name: 'Ci::JobArtifact', foreign_key: :job_id, dependent: :destroy, inverse_of: :job # rubocop:disable Cop/ActiveRecordDependent
......
...@@ -5,5 +5,8 @@ class Ci::BuildPendingState < ApplicationRecord ...@@ -5,5 +5,8 @@ class Ci::BuildPendingState < ApplicationRecord
belongs_to :build, class_name: 'Ci::Build', foreign_key: :build_id belongs_to :build, class_name: 'Ci::Build', foreign_key: :build_id
enum state: Ci::Stage.statuses
enum failure_reason: CommitStatus.failure_reasons
validates :build, presence: true validates :build, presence: true
end end
...@@ -26,45 +26,13 @@ module Ci ...@@ -26,45 +26,13 @@ module Ci
private private
def build_state def accept_build_state!
params.dig(:state).to_s if ACCEPT_TIMEOUT.ago > ensure_pending_state.created_at
end metrics.increment_trace_operation(operation: :discarded)
def has_trace?
params.dig(:trace).present?
end
def has_checksum?
params.dig(:checksum).present?
end
def has_chunks?
build.trace_chunks.any?
end
def live_chunks_pending?
build.trace_chunks.live.any?
end
def build_running?
build_state == 'running'
end
def accept_available?
!build_running? && has_checksum? && chunks_migration_enabled?
end
def accept_request?
accept_available? && live_chunks_pending?
end
def chunks_migration_enabled? return update_build_state!
Feature.enabled?(:ci_enable_live_trace, build.project)
end end
def accept_build_state!
# TODO, persist ci_build_state if not present (find or create)
build.trace_chunks.live.find_each do |chunk| build.trace_chunks.live.find_each do |chunk|
chunk.schedule_to_persist! chunk.schedule_to_persist!
end end
...@@ -77,11 +45,11 @@ module Ci ...@@ -77,11 +45,11 @@ module Ci
def overwrite_trace! def overwrite_trace!
metrics.increment_trace_operation(operation: :overwrite) metrics.increment_trace_operation(operation: :overwrite)
# TODO, disable in FF build.trace.set(params[:trace]) # TODO, disable by default using a new FF
build.trace.set(params[:trace])
end end
def update_build_state! def update_build_state!
# TODO, simplify this, doesn't work in case of timeout too
if accept_available? && has_chunks? if accept_available? && has_chunks?
metrics.increment_trace_operation(operation: :finalized) metrics.increment_trace_operation(operation: :finalized)
end end
...@@ -103,5 +71,54 @@ module Ci ...@@ -103,5 +71,54 @@ module Ci
Result.new(status: 400) Result.new(status: 400)
end end
end end
def accept_available?
!build_running? && has_checksum? && chunks_migration_enabled?
end
def accept_request?
accept_available? && live_chunks_pending?
end
def build_state
params.dig(:state).to_s
end
def has_trace?
params.dig(:trace).present?
end
def has_checksum?
params.dig(:checksum).present?
end
def has_chunks?
build.trace_chunks.any?
end
def live_chunks_pending?
build.trace_chunks.live.any?
end
def build_running?
build_state == 'running'
end
def ensure_pending_state
Ci::BuildPendingState.create_or_find_by!(
build_id: build.id,
state: params.fetch(:state),
trace_checksum: params.fetch(:checksum),
failure_reason: params.dig(:failure_reason)
)
rescue ActiveRecord::RecordNotFound
metrics.increment_trace_operation(operation: :flaky)
build.pending_state
end
def chunks_migration_enabled?
Feature.enabled?(:ci_enable_live_trace, build.project)
end
end end
end end
---
title: Migrate live traces before updating build state
merge_request: 41304
author:
type: added
# frozen_string_literal: true
class ChangeBuildPendingStateEnums < ActiveRecord::Migration[6.0]
DOWNTIME = false
def up
change_column :ci_build_pending_states, :state, :integer, limit: 2
change_column :ci_build_pending_states, :failure_reason, :integer, limit: 2
end
def down
change_column :ci_build_pending_states, :state, :integer
change_column :ci_build_pending_states, :failure_reason, :integer
end
end
c46bafefa5fd79a6644cbe259260b66aaded36aad4ae28a84ddd8bb072c2167d
\ No newline at end of file
...@@ -9831,8 +9831,8 @@ CREATE TABLE public.ci_build_pending_states ( ...@@ -9831,8 +9831,8 @@ CREATE TABLE public.ci_build_pending_states (
created_at timestamp with time zone NOT NULL, created_at timestamp with time zone NOT NULL,
updated_at timestamp with time zone NOT NULL, updated_at timestamp with time zone NOT NULL,
build_id bigint NOT NULL, build_id bigint NOT NULL,
state integer, state smallint,
failure_reason integer, failure_reason smallint,
trace_checksum bytea trace_checksum bytea
); );
......
...@@ -7,7 +7,7 @@ module Gitlab ...@@ -7,7 +7,7 @@ module Gitlab
extend Gitlab::Utils::StrongMemoize extend Gitlab::Utils::StrongMemoize
OPERATIONS = [:appended, :mutated, :overwrite, :accepted, OPERATIONS = [:appended, :mutated, :overwrite, :accepted,
:finalized].freeze :finalized, :discarded, :flaky].freeze
def increment_trace_operation(operation: :unknown) def increment_trace_operation(operation: :unknown)
unless OPERATIONS.include?(operation) unless OPERATIONS.include?(operation)
......
...@@ -50,7 +50,7 @@ RSpec.describe Ci::RetryBuildService do ...@@ -50,7 +50,7 @@ RSpec.describe Ci::RetryBuildService do
metadata runner_session trace_chunks upstream_pipeline_id metadata runner_session trace_chunks upstream_pipeline_id
artifacts_file artifacts_metadata artifacts_size commands artifacts_file artifacts_metadata artifacts_size commands
resource resource_group_id processed security_scans author resource resource_group_id processed security_scans author
pipeline_id report_results].freeze pipeline_id report_results pending_state].freeze
shared_examples 'build duplication' do shared_examples 'build duplication' do
let(:another_pipeline) { create(:ci_empty_pipeline, project: project) } let(:another_pipeline) { create(:ci_empty_pipeline, project: project) }
......
...@@ -74,7 +74,9 @@ RSpec.describe Ci::UpdateBuildStateService do ...@@ -74,7 +74,9 @@ RSpec.describe Ci::UpdateBuildStateService do
end end
context 'when build has a checksum' do context 'when build has a checksum' do
let(:params) { { checksum: 'crc32:12345678', state: 'success' } } let(:params) do
{ checksum: 'crc32:12345678', state: 'failed', failure_reason: 'script_failure' }
end
context 'when build trace has been migrated' do context 'when build trace has been migrated' do
before do before do
...@@ -84,7 +86,7 @@ RSpec.describe Ci::UpdateBuildStateService do ...@@ -84,7 +86,7 @@ RSpec.describe Ci::UpdateBuildStateService do
it 'updates a build state' do it 'updates a build state' do
subject.execute subject.execute
expect(build).to be_success expect(build).to be_failed
end end
it 'responds with 200 OK status' do it 'responds with 200 OK status' do
...@@ -135,6 +137,72 @@ RSpec.describe Ci::UpdateBuildStateService do ...@@ -135,6 +137,72 @@ RSpec.describe Ci::UpdateBuildStateService do
.with(operation: :accepted) .with(operation: :accepted)
end end
it 'creates a pending state record' do
subject.execute
build.pending_state.then do |status|
expect(status).to be_present
expect(status.state).to eq 'failed'
expect(status.trace_checksum).to eq 'crc32:12345678'
expect(status.failure_reason).to eq 'script_failure'
end
end
context 'when build pending state is outdated' do
before do
build.create_pending_state(
state: 'failed',
trace_checksum: 'crc32:12345678',
failure_reason: 'script_failure',
created_at: 10.minutes.ago
)
end
it 'responds with 200 OK' do
result = subject.execute
expect(result.status).to eq 200
end
it 'updates build state' do
subject.execute
expect(build.reload).to be_failed
expect(build.failure_reason).to eq 'script_failure'
end
it 'increments discarded traces metric' do
subject.execute
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :discarded)
end
end
context 'when build pending state has changes' do
before do
build.create_pending_state(
state: 'success',
created_at: 10.minutes.ago
)
end
it 'uses stored state and responds with 200 OK' do
result = subject.execute
expect(result.status).to eq 200
end
it 'increments flaky trace metric' do
subject.execute
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :flaky)
end
end
context 'when live traces are disabled' do context 'when live traces are disabled' do
before do before do
stub_feature_flags(ci_enable_live_trace: false) stub_feature_flags(ci_enable_live_trace: false)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment