Commit 91fb7752 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Validate build traces using CRC32 checksums

This commit adds a mechanism for validating correctness of build logs
using CRC32 checksum algorithm.
parent 8f3c5803
...@@ -29,6 +29,7 @@ module Ci ...@@ -29,6 +29,7 @@ module Ci
} }
scope :live, -> { redis } scope :live, -> { redis }
scope :persisted, -> { not_redis.order(:chunk_index) }
class << self class << self
def all_stores def all_stores
...@@ -63,12 +64,24 @@ module Ci ...@@ -63,12 +64,24 @@ module Ci
get_store_class(store).delete_keys(value) get_store_class(store).delete_keys(value)
end end
end end
##
# Sometimes we do not want to read raw data. This method makes it easier
# to find attributes that are just metadata excluding raw data.
#
def metadata_attributes
attribute_names - %w[raw_data]
end
end end
def data def data
@data ||= get_data.to_s @data ||= get_data.to_s
end end
def crc32
checksum.to_i
end
def truncate(offset = 0) def truncate(offset = 0)
raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0 raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0
return if offset == size # Skip the following process as it doesn't affect anything return if offset == size # Skip the following process as it doesn't affect anything
...@@ -150,7 +163,7 @@ module Ci ...@@ -150,7 +163,7 @@ module Ci
self.raw_data = nil self.raw_data = nil
self.data_store = new_store self.data_store = new_store
self.checksum = crc32(current_data) self.checksum = self.class.crc32(current_data)
## ##
# We need to so persist data then save a new store identifier before we # We need to so persist data then save a new store identifier before we
......
...@@ -17,6 +17,8 @@ module Ci ...@@ -17,6 +17,8 @@ module Ci
def data(model) def data(model)
model.raw_data model.raw_data
rescue ActiveModel::MissingAttributeError
model.reset.raw_data
end end
def set_data(model, new_data) def set_data(model, new_data)
......
...@@ -3,11 +3,11 @@ ...@@ -3,11 +3,11 @@
module Checksummable module Checksummable
extend ActiveSupport::Concern extend ActiveSupport::Concern
class_methods do
def crc32(data) def crc32(data)
Zlib.crc32(data) Zlib.crc32(data)
end end
class_methods do
def hexdigest(path) def hexdigest(path)
::Digest::SHA256.file(path).hexdigest ::Digest::SHA256.file(path).hexdigest
end end
......
...@@ -2,6 +2,8 @@ ...@@ -2,6 +2,8 @@
module Ci module Ci
class UpdateBuildStateService class UpdateBuildStateService
include Gitlab::Utils::StrongMemoize
Result = Struct.new(:status, :backoff, keyword_init: true) Result = Struct.new(:status, :backoff, keyword_init: true)
ACCEPT_TIMEOUT = 5.minutes.freeze ACCEPT_TIMEOUT = 5.minutes.freeze
...@@ -16,11 +18,12 @@ module Ci ...@@ -16,11 +18,12 @@ module Ci
def execute def execute
overwrite_trace! if has_trace? overwrite_trace! if has_trace?
create_pending_state! if accept_available?
if accept_request? if accept_request?
accept_build_state! accept_build_state!
else else
check_migration_state validate_build_trace!
update_build_state! update_build_state!
end end
end end
...@@ -28,9 +31,7 @@ module Ci ...@@ -28,9 +31,7 @@ module Ci
private private
def accept_build_state! def accept_build_state!
state_created = ensure_pending_state.created_at if Time.current - pending_state.created_at > ACCEPT_TIMEOUT
if Time.current - state_created > ACCEPT_TIMEOUT
metrics.increment_trace_operation(operation: :discarded) metrics.increment_trace_operation(operation: :discarded)
return update_build_state! return update_build_state!
...@@ -42,7 +43,7 @@ module Ci ...@@ -42,7 +43,7 @@ module Ci
metrics.increment_trace_operation(operation: :accepted) metrics.increment_trace_operation(operation: :accepted)
::Gitlab::Ci::Runner::Backoff.new(state_created).then do |backoff| ::Gitlab::Ci::Runner::Backoff.new(pending_state.created_at).then do |backoff|
Result.new(status: 202, backoff: backoff.to_seconds) Result.new(status: 202, backoff: backoff.to_seconds)
end end
end end
...@@ -53,12 +54,20 @@ module Ci ...@@ -53,12 +54,20 @@ module Ci
build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite? build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite?
end end
def check_migration_state def create_pending_state!
pending_state.created_at
end
def validate_build_trace!
return unless accept_available? return unless accept_available?
if has_chunks? && !live_chunks_pending? unless ::Gitlab::Ci::Trace::Checksum.new(build).valid?
metrics.increment_trace_operation(operation: :finalized) metrics.increment_trace_operation(operation: :invalid)
end end
return unless chunks_persisted?
metrics.increment_trace_operation(operation: :finalized)
end end
def update_build_state! def update_build_state!
...@@ -100,18 +109,22 @@ module Ci ...@@ -100,18 +109,22 @@ module Ci
params.dig(:checksum).present? params.dig(:checksum).present?
end end
def has_chunks?
build.trace_chunks.any?
end
def live_chunks_pending? def live_chunks_pending?
build.trace_chunks.live.any? build.trace_chunks.live.any?
end end
def chunks_persisted?
build.trace_chunks.any? && !live_chunks_pending?
end
def build_running? def build_running?
build_state == 'running' build_state == 'running'
end end
def pending_state
strong_memoize(:pending_state) { ensure_pending_state }
end
def ensure_pending_state def ensure_pending_state
Ci::BuildPendingState.create_or_find_by!( Ci::BuildPendingState.create_or_find_by!(
build_id: build.id, build_id: build.id,
......
---
title: Validate build traces using CRC32 checksums
merge_request: 42829
author:
type: added
# frozen_string_literal: true
module Gitlab
module Ci
class Trace
##
# Trace::Checksum class is responsible for calculating a CRC32 checksum
# of an entire build trace using partial build trace chunks stored in a
# database.
#
# CRC32 checksum can be easily calculated by combining partial checksums
# in a right order.
#
# Then we compare CRC32 checksum provided by a GitLab Runner and expect
# it to be the same as the CRC32 checksum derived from partial chunks.
#
class Checksum
include Gitlab::Utils::StrongMemoize
attr_reader :build
def initialize(build)
@build = build
end
def valid?
return false unless state_crc32 > 0
state_crc32 == chunks_crc32
end
def state_crc32
strong_memoize(:crc32) do
build.pending_state&.trace_checksum.then do |checksum|
checksum.to_s.split('crc32:').last.to_i
end
end
end
def chunks_crc32
trace_chunks.reduce(0) do |crc32, chunk|
Zlib.crc32_combine(crc32, chunk.crc32, chunk_size(chunk))
end
end
def chunks_count
strong_memoize(:chunks_count) do
build.trace_chunks.maximum(:chunk_index)
end
end
##
# Trace chunks will be persisted in a database if an object store is
# not configured - in that case we do not want to load entire raw data
# of all the chunks into memory.
#
# We ignore `raw_data` attribute instead, and rely on internal build
# trace chunk database adapter to handle
# `ActiveModel::MissingAttributeError` exception.
#
# Alternative solution would be separating chunk data from chunk
# metadata on the database level too.
#
def trace_chunks
build.trace_chunks.persisted
.select(::Ci::BuildTraceChunk.metadata_attributes)
end
private
def chunk_size(chunk)
if chunk.chunk_index == chunks_count
chunk.size
else
::Ci::BuildTraceChunk::CHUNK_SIZE
end
end
end
end
end
end
...@@ -7,7 +7,7 @@ module Gitlab ...@@ -7,7 +7,7 @@ module Gitlab
extend Gitlab::Utils::StrongMemoize extend Gitlab::Utils::StrongMemoize
OPERATIONS = [:appended, :streamed, :chunked, :mutated, :overwrite, OPERATIONS = [:appended, :streamed, :chunked, :mutated, :overwrite,
:accepted, :finalized, :discarded, :conflict].freeze :accepted, :finalized, :discarded, :conflict, :invalid].freeze
def increment_trace_operation(operation: :unknown) def increment_trace_operation(operation: :unknown)
unless OPERATIONS.include?(operation) unless OPERATIONS.include?(operation)
......
...@@ -53,5 +53,18 @@ FactoryBot.define do ...@@ -53,5 +53,18 @@ FactoryBot.define do
trait :fog_without_data do trait :fog_without_data do
data_store { :fog } data_store { :fog }
end end
trait :persisted do
data_store { :database}
transient do
initial_data { 'test data' }
end
after(:build) do |chunk, evaluator|
Ci::BuildTraceChunks::Database.new.set_data(chunk, evaluator.initial_data)
chunk.checksum = chunk.class.crc32(evaluator.initial_data)
end
end
end end
end end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Trace::Checksum do
let(:build) { create(:ci_build, :running) }
subject { described_class.new(build) }
context 'when build pending state exits' do
before do
create(:ci_build_pending_state, build: build, trace_checksum: 'crc32:3564598592')
end
context 'when matching persisted trace chunks exist' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 1, data: 'b' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'calculates combined trace chunks CRC32 correctly' do
expect(subject.chunks_crc32).to eq 3564598592
expect(subject).to be_valid
end
end
context 'when trace chunks were persisted in a wrong order' do
before do
create_chunk(index: 0, data: 'b' * 128.kilobytes)
create_chunk(index: 1, data: 'a' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when one of the trace chunks is missing' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when checksums of persisted trace chunks do not match' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 1, data: 'X' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when persisted trace chunks are missing' do
it 'makes trace checksum invalid' do
expect(subject.state_crc32).to eq 3564598592
expect(subject).not_to be_valid
end
end
end
context 'when build pending state is missing' do
describe '#state_crc32' do
it 'returns zero' do
expect(subject.state_crc32).to be_zero
end
end
describe '#valid?' do
it { is_expected.not_to be_valid }
end
end
describe '#trace_chunks' do
before do
create_chunk(index: 0, data: 'abcdefg')
end
it 'does not load raw_data from a database store' do
subject.trace_chunks.first.then do |chunk|
expect(chunk).to be_database
expect { chunk.raw_data }
.to raise_error ActiveModel::MissingAttributeError
end
end
end
def create_chunk(index:, data:)
create(:ci_build_trace_chunk, :persisted, build: build,
chunk_index: index,
initial_data: data)
end
end
...@@ -3,17 +3,21 @@ ...@@ -3,17 +3,21 @@
require 'spec_helper' require 'spec_helper'
RSpec.describe Checksummable do RSpec.describe Checksummable do
describe ".hexdigest" do subject do
let(:fake_class) do Class.new { include Checksummable }
Class.new do end
include Checksummable
describe ".crc32" do
it 'returns the CRC32 of data' do
expect(subject.crc32('abcd')).to eq 3984772369
end end
end end
describe ".hexdigest" do
it 'returns the SHA256 sum of the file' do it 'returns the SHA256 sum of the file' do
expected = Digest::SHA256.file(__FILE__).hexdigest expected = Digest::SHA256.file(__FILE__).hexdigest
expect(fake_class.hexdigest(__FILE__)).to eq(expected) expect(subject.hexdigest(__FILE__)).to eq(expected)
end end
end end
end end
...@@ -145,7 +145,7 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do ...@@ -145,7 +145,7 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
update_job(state: 'success', checksum: 'crc:12345678') update_job(state: 'success', checksum: 'crc:12345678')
expect(job.reload).to be_success expect(job.reload).to be_success
expect(job.pending_state).not_to be_present expect(job.pending_state).to be_present
expect(response).to have_gitlab_http_status(:ok) expect(response).to have_gitlab_http_status(:ok)
expect(response.header).not_to have_key('X-GitLab-Trace-Update-Interval') expect(response.header).not_to have_key('X-GitLab-Trace-Update-Interval')
end end
......
...@@ -85,7 +85,7 @@ RSpec.describe Ci::UpdateBuildStateService do ...@@ -85,7 +85,7 @@ RSpec.describe Ci::UpdateBuildStateService do
context 'when build trace has been migrated' do context 'when build trace has been migrated' do
before do before do
create(:ci_build_trace_chunk, :database_with_data, build: build) create(:ci_build_trace_chunk, :persisted, build: build, initial_data: 'abcd')
end end
it 'updates a build state' do it 'updates a build state' do
...@@ -113,6 +113,28 @@ RSpec.describe Ci::UpdateBuildStateService do ...@@ -113,6 +113,28 @@ RSpec.describe Ci::UpdateBuildStateService do
.to have_received(:increment_trace_operation) .to have_received(:increment_trace_operation)
.with(operation: :finalized) .with(operation: :finalized)
end end
context 'when trace checksum is not valid' do
it 'increments invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
end
context 'when trace checksum is valid' do
let(:params) { { checksum: 'crc32:3984772369', state: 'success' } }
it 'does not increment invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
end
end end
context 'when build trace has not been migrated yet' do context 'when build trace has not been migrated yet' do
...@@ -146,14 +168,6 @@ RSpec.describe Ci::UpdateBuildStateService do ...@@ -146,14 +168,6 @@ RSpec.describe Ci::UpdateBuildStateService do
subject.execute subject.execute
end end
it 'increments trace accepted operation metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :accepted)
end
it 'creates a pending state record' do it 'creates a pending state record' do
subject.execute subject.execute
...@@ -165,6 +179,22 @@ RSpec.describe Ci::UpdateBuildStateService do ...@@ -165,6 +179,22 @@ RSpec.describe Ci::UpdateBuildStateService do
end end
end end
it 'increments trace accepted operation metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :accepted)
end
it 'does not increment invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
context 'when build pending state is outdated' do context 'when build pending state is outdated' do
before do before do
build.create_pending_state( build.create_pending_state(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment