Commit 91fb7752 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Validate build traces using CRC32 checksums

This commit adds a mechanism for validating correctness of build logs
using CRC32 checksum algorithm.
parent 8f3c5803
......@@ -29,6 +29,7 @@ module Ci
}
scope :live, -> { redis }
scope :persisted, -> { not_redis.order(:chunk_index) }
class << self
def all_stores
......@@ -63,12 +64,24 @@ module Ci
get_store_class(store).delete_keys(value)
end
end
##
# Sometimes we do not want to read raw data. This method makes it easier
# to find attributes that are just metadata excluding raw data.
#
def metadata_attributes
attribute_names - %w[raw_data]
end
end
def data
@data ||= get_data.to_s
end
def crc32
checksum.to_i
end
def truncate(offset = 0)
raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0
return if offset == size # Skip the following process as it doesn't affect anything
......@@ -150,7 +163,7 @@ module Ci
self.raw_data = nil
self.data_store = new_store
self.checksum = crc32(current_data)
self.checksum = self.class.crc32(current_data)
##
# We need to so persist data then save a new store identifier before we
......
......@@ -17,6 +17,8 @@ module Ci
def data(model)
model.raw_data
rescue ActiveModel::MissingAttributeError
model.reset.raw_data
end
def set_data(model, new_data)
......
......@@ -3,11 +3,11 @@
module Checksummable
extend ActiveSupport::Concern
def crc32(data)
Zlib.crc32(data)
end
class_methods do
def crc32(data)
Zlib.crc32(data)
end
def hexdigest(path)
::Digest::SHA256.file(path).hexdigest
end
......
......@@ -2,6 +2,8 @@
module Ci
class UpdateBuildStateService
include Gitlab::Utils::StrongMemoize
Result = Struct.new(:status, :backoff, keyword_init: true)
ACCEPT_TIMEOUT = 5.minutes.freeze
......@@ -16,11 +18,12 @@ module Ci
def execute
overwrite_trace! if has_trace?
create_pending_state! if accept_available?
if accept_request?
accept_build_state!
else
check_migration_state
validate_build_trace!
update_build_state!
end
end
......@@ -28,9 +31,7 @@ module Ci
private
def accept_build_state!
state_created = ensure_pending_state.created_at
if Time.current - state_created > ACCEPT_TIMEOUT
if Time.current - pending_state.created_at > ACCEPT_TIMEOUT
metrics.increment_trace_operation(operation: :discarded)
return update_build_state!
......@@ -42,7 +43,7 @@ module Ci
metrics.increment_trace_operation(operation: :accepted)
::Gitlab::Ci::Runner::Backoff.new(state_created).then do |backoff|
::Gitlab::Ci::Runner::Backoff.new(pending_state.created_at).then do |backoff|
Result.new(status: 202, backoff: backoff.to_seconds)
end
end
......@@ -53,12 +54,20 @@ module Ci
build.trace.set(params[:trace]) if Gitlab::Ci::Features.trace_overwrite?
end
def check_migration_state
def create_pending_state!
pending_state.created_at
end
def validate_build_trace!
return unless accept_available?
if has_chunks? && !live_chunks_pending?
metrics.increment_trace_operation(operation: :finalized)
unless ::Gitlab::Ci::Trace::Checksum.new(build).valid?
metrics.increment_trace_operation(operation: :invalid)
end
return unless chunks_persisted?
metrics.increment_trace_operation(operation: :finalized)
end
def update_build_state!
......@@ -100,18 +109,22 @@ module Ci
params.dig(:checksum).present?
end
def has_chunks?
build.trace_chunks.any?
end
def live_chunks_pending?
build.trace_chunks.live.any?
end
def chunks_persisted?
build.trace_chunks.any? && !live_chunks_pending?
end
def build_running?
build_state == 'running'
end
def pending_state
strong_memoize(:pending_state) { ensure_pending_state }
end
def ensure_pending_state
Ci::BuildPendingState.create_or_find_by!(
build_id: build.id,
......
---
title: Validate build traces using CRC32 checksums
merge_request: 42829
author:
type: added
# frozen_string_literal: true
module Gitlab
module Ci
class Trace
##
# Trace::Checksum class is responsible for calculating a CRC32 checksum
# of an entire build trace using partial build trace chunks stored in a
# database.
#
# CRC32 checksum can be easily calculated by combining partial checksums
# in a right order.
#
# Then we compare CRC32 checksum provided by a GitLab Runner and expect
# it to be the same as the CRC32 checksum derived from partial chunks.
#
class Checksum
include Gitlab::Utils::StrongMemoize
attr_reader :build
def initialize(build)
@build = build
end
def valid?
return false unless state_crc32 > 0
state_crc32 == chunks_crc32
end
def state_crc32
strong_memoize(:crc32) do
build.pending_state&.trace_checksum.then do |checksum|
checksum.to_s.split('crc32:').last.to_i
end
end
end
def chunks_crc32
trace_chunks.reduce(0) do |crc32, chunk|
Zlib.crc32_combine(crc32, chunk.crc32, chunk_size(chunk))
end
end
def chunks_count
strong_memoize(:chunks_count) do
build.trace_chunks.maximum(:chunk_index)
end
end
##
# Trace chunks will be persisted in a database if an object store is
# not configured - in that case we do not want to load entire raw data
# of all the chunks into memory.
#
# We ignore `raw_data` attribute instead, and rely on internal build
# trace chunk database adapter to handle
# `ActiveModel::MissingAttributeError` exception.
#
# Alternative solution would be separating chunk data from chunk
# metadata on the database level too.
#
def trace_chunks
build.trace_chunks.persisted
.select(::Ci::BuildTraceChunk.metadata_attributes)
end
private
def chunk_size(chunk)
if chunk.chunk_index == chunks_count
chunk.size
else
::Ci::BuildTraceChunk::CHUNK_SIZE
end
end
end
end
end
end
......@@ -7,7 +7,7 @@ module Gitlab
extend Gitlab::Utils::StrongMemoize
OPERATIONS = [:appended, :streamed, :chunked, :mutated, :overwrite,
:accepted, :finalized, :discarded, :conflict].freeze
:accepted, :finalized, :discarded, :conflict, :invalid].freeze
def increment_trace_operation(operation: :unknown)
unless OPERATIONS.include?(operation)
......
......@@ -53,5 +53,18 @@ FactoryBot.define do
trait :fog_without_data do
data_store { :fog }
end
trait :persisted do
data_store { :database}
transient do
initial_data { 'test data' }
end
after(:build) do |chunk, evaluator|
Ci::BuildTraceChunks::Database.new.set_data(chunk, evaluator.initial_data)
chunk.checksum = chunk.class.crc32(evaluator.initial_data)
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
RSpec.describe Gitlab::Ci::Trace::Checksum do
let(:build) { create(:ci_build, :running) }
subject { described_class.new(build) }
context 'when build pending state exits' do
before do
create(:ci_build_pending_state, build: build, trace_checksum: 'crc32:3564598592')
end
context 'when matching persisted trace chunks exist' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 1, data: 'b' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'calculates combined trace chunks CRC32 correctly' do
expect(subject.chunks_crc32).to eq 3564598592
expect(subject).to be_valid
end
end
context 'when trace chunks were persisted in a wrong order' do
before do
create_chunk(index: 0, data: 'b' * 128.kilobytes)
create_chunk(index: 1, data: 'a' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when one of the trace chunks is missing' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when checksums of persisted trace chunks do not match' do
before do
create_chunk(index: 0, data: 'a' * 128.kilobytes)
create_chunk(index: 1, data: 'X' * 128.kilobytes)
create_chunk(index: 2, data: 'ccccccccccccccccc')
end
it 'makes trace checksum invalid' do
expect(subject).not_to be_valid
end
end
context 'when persisted trace chunks are missing' do
it 'makes trace checksum invalid' do
expect(subject.state_crc32).to eq 3564598592
expect(subject).not_to be_valid
end
end
end
context 'when build pending state is missing' do
describe '#state_crc32' do
it 'returns zero' do
expect(subject.state_crc32).to be_zero
end
end
describe '#valid?' do
it { is_expected.not_to be_valid }
end
end
describe '#trace_chunks' do
before do
create_chunk(index: 0, data: 'abcdefg')
end
it 'does not load raw_data from a database store' do
subject.trace_chunks.first.then do |chunk|
expect(chunk).to be_database
expect { chunk.raw_data }
.to raise_error ActiveModel::MissingAttributeError
end
end
end
def create_chunk(index:, data:)
create(:ci_build_trace_chunk, :persisted, build: build,
chunk_index: index,
initial_data: data)
end
end
......@@ -3,17 +3,21 @@
require 'spec_helper'
RSpec.describe Checksummable do
describe ".hexdigest" do
let(:fake_class) do
Class.new do
include Checksummable
end
subject do
Class.new { include Checksummable }
end
describe ".crc32" do
it 'returns the CRC32 of data' do
expect(subject.crc32('abcd')).to eq 3984772369
end
end
describe ".hexdigest" do
it 'returns the SHA256 sum of the file' do
expected = Digest::SHA256.file(__FILE__).hexdigest
expect(fake_class.hexdigest(__FILE__)).to eq(expected)
expect(subject.hexdigest(__FILE__)).to eq(expected)
end
end
end
......@@ -145,7 +145,7 @@ RSpec.describe API::Ci::Runner, :clean_gitlab_redis_shared_state do
update_job(state: 'success', checksum: 'crc:12345678')
expect(job.reload).to be_success
expect(job.pending_state).not_to be_present
expect(job.pending_state).to be_present
expect(response).to have_gitlab_http_status(:ok)
expect(response.header).not_to have_key('X-GitLab-Trace-Update-Interval')
end
......
......@@ -85,7 +85,7 @@ RSpec.describe Ci::UpdateBuildStateService do
context 'when build trace has been migrated' do
before do
create(:ci_build_trace_chunk, :database_with_data, build: build)
create(:ci_build_trace_chunk, :persisted, build: build, initial_data: 'abcd')
end
it 'updates a build state' do
......@@ -113,6 +113,28 @@ RSpec.describe Ci::UpdateBuildStateService do
.to have_received(:increment_trace_operation)
.with(operation: :finalized)
end
context 'when trace checksum is not valid' do
it 'increments invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
end
context 'when trace checksum is valid' do
let(:params) { { checksum: 'crc32:3984772369', state: 'success' } }
it 'does not increment invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
end
end
context 'when build trace has not been migrated yet' do
......@@ -146,14 +168,6 @@ RSpec.describe Ci::UpdateBuildStateService do
subject.execute
end
it 'increments trace accepted operation metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :accepted)
end
it 'creates a pending state record' do
subject.execute
......@@ -165,6 +179,22 @@ RSpec.describe Ci::UpdateBuildStateService do
end
end
it 'increments trace accepted operation metric' do
execute_with_stubbed_metrics!
expect(metrics)
.to have_received(:increment_trace_operation)
.with(operation: :accepted)
end
it 'does not increment invalid trace metric' do
execute_with_stubbed_metrics!
expect(metrics)
.not_to have_received(:increment_trace_operation)
.with(operation: :invalid)
end
context 'when build pending state is outdated' do
before do
build.create_pending_state(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment