Commit 92116078 authored by Sean McGivern's avatar Sean McGivern

Merge branch 'feature/gb/validate-build-traces' into 'master'

Validate build traces using CRC32 checksums

See merge request gitlab-org/gitlab!42829
parents 0243c86c fed2649a
......@@ -327,6 +327,8 @@ module Ci
after_transition any => [:success, :failed, :canceled] do |build|
build.run_after_commit do
build.run_status_commit_hooks!
BuildFinishedWorker.perform_async(id)
end
end
......@@ -963,8 +965,24 @@ module Ci
pending_state.try(:delete)
end
def run_on_status_commit(&block)
status_commit_hooks.push(block)
end
protected
def run_status_commit_hooks!
status_commit_hooks.reverse_each do |hook|
instance_eval(&hook)
end
end
private
def status_commit_hooks
@status_commit_hooks ||= []
end
def auto_retry
strong_memoize(:auto_retry) do
Gitlab::Ci::Build::AutoRetry.new(self)
......
......@@ -3,6 +3,7 @@
module Ci
class BuildTraceChunk < ApplicationRecord
extend ::Gitlab::Ci::Model
include ::Comparable
include ::FastDestroyAll
include ::Checksummable
include ::Gitlab::ExclusiveLeaseHelpers
......@@ -29,6 +30,7 @@ module Ci
}
scope :live, -> { redis }
scope :persisted, -> { not_redis.order(:chunk_index) }
class << self
def all_stores
......@@ -63,12 +65,24 @@ module Ci
get_store_class(store).delete_keys(value)
end
end
##
# Sometimes we do not want to read raw data. This method makes it easier
# to find attributes that are just metadata excluding raw data.
#
def metadata_attributes
attribute_names - %w[raw_data]
end
end
def data
@data ||= get_data.to_s
end
def crc32
checksum.to_i
end
def truncate(offset = 0)
raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0
return if offset == size # Skip the following process as it doesn't affect anything
......@@ -130,6 +144,12 @@ module Ci
build.trace_chunks.maximum(:chunk_index).to_i == chunk_index
end
def <=>(other)
return unless self.build_id == other.build_id
self.chunk_index <=> other.chunk_index
end
private
def get_data
......@@ -150,7 +170,7 @@ module Ci
self.raw_data = nil
self.data_store = new_store
self.checksum = crc32(current_data)
self.checksum = self.class.crc32(current_data)
##
# We need to so persist data then save a new store identifier before we
......
......@@ -17,6 +17,8 @@ module Ci
def data(model)
model.raw_data
rescue ActiveModel::MissingAttributeError
model.reset.raw_data
end
def set_data(model, new_data)
......
......@@ -3,11 +3,11 @@
module Checksummable
extend ActiveSupport::Concern
def crc32(data)
Zlib.crc32(data)
end
class_methods do
def crc32(data)
Zlib.crc32(data)
end
def hexdigest(path)
::Digest::SHA256.file(path).hexdigest
end
......
......@@ -2,6 +2,9 @@
module Ci
class UpdateBuildStateService
include ::Gitlab::Utils::StrongMemoize
include ::Gitlab::ExclusiveLeaseHelpers
Result = Struct.new(:status, :backoff, keyword_init: true)
ACCEPT_TIMEOUT = 5.minutes.freeze
......@@ -17,48 +20,63 @@ module Ci
def execute
overwrite_trace! if has_trace?
if accept_request?
accept_build_state!
else
check_migration_state
update_build_state!
unless accept_available?
return update_build_state!
end
ensure_pending_state!
in_build_trace_lock do
process_build_state!
end
end
private
def accept_build_state!
state_created = ensure_pending_state.created_at
def overwrite_trace!
metrics.increment_trace_operation(operation: :overwrite)
build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite?
end
if Time.current - state_created > ACCEPT_TIMEOUT
metrics.increment_trace_operation(operation: :discarded)
def ensure_pending_state!
pending_state.created_at
end
return update_build_state!
def process_build_state!
if live_chunks_pending?
if pending_state_outdated?
discard_build_trace!
update_build_state!
else
accept_build_state!
end
else
validate_build_trace!
update_build_state!
end
end
def accept_build_state!
build.trace_chunks.live.find_each do |chunk|
chunk.schedule_to_persist!
end
metrics.increment_trace_operation(operation: :accepted)
::Gitlab::Ci::Runner::Backoff.new(state_created).then do |backoff|
::Gitlab::Ci::Runner::Backoff.new(pending_state.created_at).then do |backoff|
Result.new(status: 202, backoff: backoff.to_seconds)
end
end
def overwrite_trace!
metrics.increment_trace_operation(operation: :overwrite)
build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite?
end
def check_migration_state
return unless accept_available?
if has_chunks? && !live_chunks_pending?
def validate_build_trace!
if chunks_persisted?
metrics.increment_trace_operation(operation: :finalized)
end
unless ::Gitlab::Ci::Trace::Checksum.new(build).valid?
metrics.increment_trace_operation(operation: :invalid)
end
end
def update_build_state!
......@@ -80,12 +98,24 @@ module Ci
end
end
def discard_build_trace!
metrics.increment_trace_operation(operation: :discarded)
end
def accept_available?
!build_running? && has_checksum? && chunks_migration_enabled?
end
def accept_request?
accept_available? && live_chunks_pending?
def live_chunks_pending?
build.trace_chunks.live.any?
end
def chunks_persisted?
build.trace_chunks.any? && !live_chunks_pending?
end
def pending_state_outdated?
Time.current - pending_state.created_at > ACCEPT_TIMEOUT
end
def build_state
......@@ -100,18 +130,14 @@ module Ci
params.dig(:checksum).present?
end
def has_chunks?
build.trace_chunks.any?
end
def live_chunks_pending?
build.trace_chunks.live.any?
end
def build_running?
build_state == 'running'
end
def pending_state
strong_memoize(:pending_state) { ensure_pending_state }
end
def ensure_pending_state
Ci::BuildPendingState.create_or_find_by!(
build_id: build.id,
......@@ -125,6 +151,32 @@ module Ci
build.pending_state
end
##
# This method is releasing an exclusive lock on a build trace the moment we
# conclude that build status has been written and the build state update
# has been committed to the database.
#
# Because a build state machine schedules a bunch of workers to run after
# build status transition to complete, we do not want to keep the lease
# until all the workers are scheduled because it opens a possibility of
# race conditions happening.
#
# Instead of keeping the lease until the transition is fully done and
# workers are scheduled, we immediately release the lock after the database
# commit happens.
#
def in_build_trace_lock(&block)
build.trace.lock do |_, lease| # rubocop:disable CodeReuse/ActiveRecord
build.run_on_status_commit { lease.cancel }
yield
end
rescue ::Gitlab::Ci::Trace::LockedError
metrics.increment_trace_operation(operation: :locked)
accept_build_state!
end
def chunks_migration_enabled?
::Gitlab::Ci::Features.accept_trace?(build.project)
end
......
---
title: Validate build traces using CRC32 checksums
merge_request: 42829
author:
type: added
......@@ -16,6 +16,7 @@ module Gitlab
ArchiveError = Class.new(StandardError)
AlreadyArchivedError = Class.new(StandardError)
LockedError = Class.new(StandardError)
attr_reader :job
......@@ -130,6 +131,12 @@ module Gitlab
end
end
def lock(&block)
in_write_lock(&block)
rescue FailedToObtainLockError
raise LockedError, "build trace `#{job.id}` is locked"
end
private
def read_stream
......
# frozen_string_literal: true
module Gitlab
module Ci
class Trace
##
# Trace::Checksum class is responsible for calculating a CRC32 checksum
# of an entire build trace using partial build trace chunks stored in a
# database.
#
# CRC32 checksum can be easily calculated by combining partial checksums
# in a right order.
#
# Then we compare CRC32 checksum provided by a GitLab Runner and expect
# it to be the same as the CRC32 checksum derived from partial chunks.
#
class Checksum
include Gitlab::Utils::StrongMemoize
attr_reader :build
def initialize(build)
@build = build
end
def valid?
return false unless state_crc32 > 0
state_crc32 == chunks_crc32
end
def state_crc32
strong_memoize(:crc32) do
build.pending_state&.trace_checksum.then do |checksum|
checksum.to_s.split('crc32:').last.to_i
end
end
end
def chunks_crc32
trace_chunks.reduce(0) do |crc32, chunk|
Zlib.crc32_combine(crc32, chunk.crc32, chunk_size(chunk))
end
end
def last_chunk
strong_memoize(:last_chunk) { trace_chunks.max }
end
##
# Trace chunks will be persisted in a database if an object store is
# not configured - in that case we do not want to load entire raw data
# of all the chunks into memory.
#
# We ignore `raw_data` attribute instead, and rely on internal build
# trace chunk database adapter to handle
# `ActiveModel::MissingAttributeError` exception.
#
# Alternative solution would be separating chunk data from chunk
# metadata on the database level too.
#
def trace_chunks
strong_memoize(:trace_chunks) do
build.trace_chunks.persisted
.select(::Ci::BuildTraceChunk.metadata_attributes)
end
end
private
def chunk_size(chunk)
if chunk == last_chunk
chunk.size
else
::Ci::BuildTraceChunk::CHUNK_SIZE
end
end
end
end
end
end
......@@ -7,7 +7,8 @@ module Gitlab
extend Gitlab::Utils::StrongMemoize
OPERATIONS = [:appended, :streamed, :chunked, :mutated, :overwrite,
:accepted, :finalized, :discarded, :conflict].freeze
:accepted, :finalized, :discarded, :conflict, :locked,
:invalid].freeze
def increment_trace_operation(operation: :unknown)
unless OPERATIONS.include?(operation)
......
......@@ -35,7 +35,7 @@ module Gitlab
lease.obtain(1 + retries)
yield(lease.retried?)
yield(lease.retried?, lease)
ensure
lease&.cancel
end
......
......@@ -53,5 +53,18 @@ FactoryBot.define do
trait :fog_without_data do
data_store { :fog }
end
trait :persisted do
data_store { :database}
transient do
initial_data { 'test data' }
end
after(:build) do |chunk, evaluator|
Ci::BuildTraceChunks::Database.new.set_data(chunk, evaluator.initial_data)
chunk.checksum = chunk.class.crc32(evaluator.initial_data)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Trace::Checksum do
let(:build) { create(:ci_build, :running) }
subject { described_class.new(build) }
context 'when build pending state exists' do
before do
create(:ci_build_pending_state, build: build, trace_checksum: 'crc32:3564598592')
end
context 'when matching persisted trace chunks exist' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 1, data: 'b' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'calculates combined trace chunks CRC32 correctly' do
expect(subject.chunks_crc32).to eq 3564598592
expect(subject).to be_valid
end
end
context 'when trace chunks were persisted in a wrong order' do
before do
create_chunk(index: 0, data: 'b' * 128.kilobytes)
create_chunk(index: 1, data: 'a' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when one of the trace chunks is missing' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when checksums of persisted trace chunks do not match' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 1, data: 'X' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when persisted trace chunks are missing' do
it 'makes trace checksum invalid' do
expect(subject.state_crc32).to eq 3564598592
expect(subject).not_to be_valid
end
end
end
context 'when build pending state is missing' do
describe '#state_crc32' do
it 'returns zero' do
expect(subject.state_crc32).to be_zero
end
end
describe '#valid?' do
it { is_expected.not_to be_valid }
end
end
describe '#trace_chunks' do
before do
create_chunk(index: 0, data: 'abcdefg')
end
it 'does not load raw_data from a database store' do
subject.trace_chunks.first.then do |chunk|
expect(chunk).to be_database
expect { chunk.raw_data }
.to raise_error ActiveModel::MissingAttributeError
end
end
end
describe '#last_chunk' do
context 'when there are no chunks' do
it 'returns nil' do
expect(subject.last_chunk).to be_nil
end
end
context 'when there are multiple chunks' do
before do
create_chunk(index: 1, data: '1234')
create_chunk(index: 0, data: 'abcd')
end
it 'returns chunk with the highest index' do
expect(subject.last_chunk.chunk_index).to eq 1
end
end
end
def create_chunk(index:, data:)
create(:ci_build_trace_chunk, :persisted, build: build,
chunk_index: index,
initial_data: data)
end
end
......@@ -111,4 +111,13 @@ RSpec.describe Gitlab::Ci::Trace, :clean_gitlab_redis_shared_state do
end
end
end
describe '#lock' do
it 'acquires an exclusive lease on the trace' do
trace.lock do
expect { trace.lock }
.to raise_error described_class::LockedError
end
end
end
end
......@@ -25,13 +25,17 @@ RSpec.describe Gitlab::ExclusiveLeaseHelpers, :clean_gitlab_redis_shared_state d
let!(:lease) { stub_exclusive_lease(unique_key, 'uuid') }
it 'calls the given block' do
expect { |b| class_instance.in_lock(unique_key, &b) }.to yield_with_args(false)
expect { |b| class_instance.in_lock(unique_key, &b) }
.to yield_with_args(false, an_instance_of(described_class::SleepingLock))
end
it 'calls the given block continuously' do
expect { |b| class_instance.in_lock(unique_key, &b) }.to yield_with_args(false)
expect { |b| class_instance.in_lock(unique_key, &b) }.to yield_with_args(false)
expect { |b| class_instance.in_lock(unique_key, &b) }.to yield_with_args(false)
expect { |b| class_instance.in_lock(unique_key, &b) }
.to yield_with_args(false, an_instance_of(described_class::SleepingLock))
expect { |b| class_instance.in_lock(unique_key, &b) }
.to yield_with_args(false, an_instance_of(described_class::SleepingLock))
expect { |b| class_instance.in_lock(unique_key, &b) }
.to yield_with_args(false, an_instance_of(described_class::SleepingLock))
end
it 'cancels the exclusive lease after the block' do
......@@ -74,7 +78,8 @@ RSpec.describe Gitlab::ExclusiveLeaseHelpers, :clean_gitlab_redis_shared_state d
expect(lease).to receive(:try_obtain).exactly(3).times { nil }
expect(lease).to receive(:try_obtain).once { unique_key }
expect { |b| class_instance.in_lock(unique_key, &b) }.to yield_with_args(true)
expect { |b| class_instance.in_lock(unique_key, &b) }
.to yield_with_args(true, an_instance_of(described_class::SleepingLock))
end
end
end
......
......@@ -4652,4 +4652,24 @@ RSpec.describe Ci::Build do
it { is_expected.to be_nil }
end
end
describe '#run_on_status_commit' do
it 'runs provided hook after status commit' do
action = spy('action')
build.run_on_status_commit { action.perform! }
build.success!
expect(action).to have_received(:perform!).once
end
it 'does not run hooks when status has not changed' do
action = spy('action')
build.run_on_status_commit { action.perform! }
build.save!
expect(action).not_to have_received(:perform!)
end
end
end
......@@ -779,4 +779,62 @@ RSpec.describe Ci::BuildTraceChunk, :clean_gitlab_redis_shared_state do
it_behaves_like 'deletes all build_trace_chunk and data in redis'
end
end
describe 'comparable build trace chunks' do
describe '#<=>' do
context 'when chunks are associated with different builds' do
let(:first) { create(:ci_build_trace_chunk, build: build, chunk_index: 1) }
let(:second) { create(:ci_build_trace_chunk, chunk_index: 1) }
it 'returns nil' do
expect(first <=> second).to be_nil
end
end
context 'when there are two chunks with different indexes' do
let(:first) { create(:ci_build_trace_chunk, build: build, chunk_index: 1) }
let(:second) { create(:ci_build_trace_chunk, build: build, chunk_index: 0) }
it 'indicates the the first one is greater than then second' do
expect(first <=> second).to eq 1
end
end
context 'when there are two chunks with the same index within the same build' do
let(:chunk) { create(:ci_build_trace_chunk) }
it 'indicates the these are equal' do
expect(chunk <=> chunk).to be_zero # rubocop:disable Lint/UselessComparison
end
end
end
describe '#==' do
context 'when chunks have the same index' do
let(:chunk) { create(:ci_build_trace_chunk) }
it 'indicates that the chunks are equal' do
expect(chunk).to eq chunk
end
end
context 'when chunks have different indexes' do
let(:first) { create(:ci_build_trace_chunk, build: build, chunk_index: 1) }
let(:second) { create(:ci_build_trace_chunk, build: build, chunk_index: 0) }
it 'indicates that the chunks are not equal' do
expect(first).not_to eq second
end
end
context 'when chunks are associated with different builds' do
let(:first) { create(:ci_build_trace_chunk, build: build, chunk_index: 1) }
let(:second) { create(:ci_build_trace_chunk, chunk_index: 1) }
it 'indicates that the chunks are not equal' do
expect(first).not_to eq second
end
end
end
end
end
......@@ -3,17 +3,21 @@
require 'spec_helper'
RSpec.describe Checksummable do
describe ".hexdigest" do
let(:fake_class) do
Class.new do
include Checksummable
end
subject do
Class.new { include Checksummable }
end
describe ".crc32" do
it 'returns the CRC32 of data' do
expect(subject.crc32('abcd')).to eq 3984772369
end
end
describe ".hexdigest" do
it 'returns the SHA256 sum of the file' do
expected = Digest::SHA256.file(__FILE__).hexdigest
expect(fake_class.hexdigest(__FILE__)).to eq(expected)
expect(subject.hexdigest(__FILE__)).to eq(expected)
end
end
end
......@@ -120,19 +120,15 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
context 'when runner retries request after receiving 202' do
it 'responds with 202 and then with 200', :sidekiq_inline do
perform_enqueued_jobs do
update_job(state: 'success', checksum: 'crc32:12345678')
end
update_job(state: 'success', checksum: 'crc32:12345678')
expect(job.reload.pending_state).to be_present
expect(response).to have_gitlab_http_status(:accepted)
expect(job.reload.pending_state).to be_present
perform_enqueued_jobs do
update_job(state: 'success', checksum: 'crc32:12345678')
end
update_job(state: 'success', checksum: 'crc32:12345678')
expect(job.reload.pending_state).not_to be_present
expect(response).to have_gitlab_http_status(:ok)
expect(job.reload.pending_state).not_to be_present
end
end
......@@ -145,7 +141,7 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
update_job(state: 'success', checksum: 'crc:12345678')
expect(job.reload).to be_success
expect(job.pending_state).not_to be_present
expect(job.pending_state).to be_present
expect(response).to have_gitlab_http_status(:ok)
expect(response.header).not_to have_key('X-GitLab-Trace-Update-Interval')
end
......
......@@ -85,7 +85,7 @@ RSpec.describe Ci::UpdateBuildStateService do
context 'when build trace has been migrated' do
before do
create(:ci_build_trace_chunk, :database_with_data, build: build)
create(:ci_build_trace_chunk, :persisted, build: build, initial_data: 'abcd')
end
it 'updates a build state' do
......@@ -113,6 +113,48 @@ RSpec.describe Ci::UpdateBuildStateService do
.to have_received(:increment_trace_operation)
.with(operation: :finalized)
end
context 'when trace checksum is not valid' do
it 'increments invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
end
context 'when trace checksum is valid' do
let(:params) { { checksum: 'crc32:3984772369', state: 'success' } }
it 'does not increment invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
end
context 'when failed to acquire a build trace lock' do
it 'accepts a state update request' do
build.trace.lock do
result = subject.execute
expect(result.status).to eq 202
end
end
it 'increment locked trace metric' do
build.trace.lock do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :locked)
end
end
end
end
context 'when build trace has not been migrated yet' do
......@@ -146,14 +188,6 @@ RSpec.describe Ci::UpdateBuildStateService do
subject.execute
end
it 'increments trace accepted operation metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :accepted)
end
it 'creates a pending state record' do
subject.execute
......@@ -165,6 +199,22 @@ RSpec.describe Ci::UpdateBuildStateService do
end
end
it 'increments trace accepted operation metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :accepted)
end
it 'does not increment invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
context 'when build pending state is outdated' do
before do
build.create_pending_state(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment