Commit f0d3c690 authored by Stan Hu's avatar Stan Hu

Force ASCII-8BIT encodings in CI job trace

This commit adds a feature flag, `ci_job_trace_force_encode`, that will
ensure that CI job traces that contain UTF-8 can be appended with
ASCII-8BIT data.

Previously we saw encoding errors when persisted job logs that contained
UTF-8 characters were appended to ASCII-8BIT data from the HTTP
request. This would cause a 500 error, but the runner would retry
indefinitely with identical PATCH requests. This commit should make the
append work and allow the job to finish.

This is the sequence of events that causes the append to be called:

1. Runner sends a `PUT /api/v4/jobs/:id` to indicate the job is canceled
or finished.

2. `UpdateBuildStateService#accept_build_state!` persists all live job
logs to object storage.

3. `UpdateBuildStateService#accept_build_state!` returns a 202 to the
runner.

4. The runner will continue to send PATCH requests with job logs until
all logs have been sent and received.

5. If the last PATCH request arrives after the job log has been
persisted, we retrieve the data from object storage to append the
remaining lines.

Relates to https://gitlab.com/gitlab-org/gitlab/-/issues/333424

Changelog: fixed
parent 28a19b15
......@@ -121,7 +121,7 @@ module Ci
raise ArgumentError, 'Offset is out of range' if offset > size || offset < 0
return if offset == size # Skip the following process as it doesn't affect anything
self.append("", offset)
self.append(+"", offset)
end
def append(new_data, offset)
......
......@@ -25,10 +25,18 @@ module Ci
files.create(create_attributes(model, new_data))
end
# This is the sequence that causes append_data to be called:
#
# 1. Runner sends a PUT /api/v4/jobs/:id to indicate the job is canceled or finished.
# 2. UpdateBuildStateService#accept_build_state! persists all live job logs to object storage (or filesystem).
# 3. UpdateBuildStateService#accept_build_state! returns a 202 to the runner.
# 4. The runner continues to send PATCH requests with job logs until all logs have been sent and received.
# 5. If the last PATCH request arrives after the job log has been persisted, we
# retrieve the data from object storage to append the remaining lines.
def append_data(model, new_data, offset)
if offset > 0
truncated_data = data(model).to_s.byteslice(0, offset)
new_data = truncated_data + new_data
new_data = append_strings(truncated_data, new_data)
end
set_data(model, new_data)
......@@ -71,6 +79,17 @@ module Ci
private
def append_strings(old_data, new_data)
if Feature.enabled?(:ci_job_trace_force_encode, default_enabled: :yaml)
# When object storage is in use, old_data may be retrieved in UTF-8.
old_data = old_data.force_encoding('ASCII-8BIT')
# new_data should already be in ASCII-8BIT, but just in case it isn't, do this.
new_data = new_data.force_encoding('ASCII-8BIT')
end
old_data + new_data
end
def key(model)
key_raw(model.build_id, model.chunk_index)
end
......
---
name: ci_job_trace_force_encode
introduced_by_url: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/64631
rollout_issue_url: https://gitlab.com/gitlab-org/gitlab/-/issues/333452
milestone: '14.1'
type: development
group: group::verify
default_enabled: false
......@@ -103,19 +103,53 @@ RSpec.describe Ci::BuildTraceChunks::Fog do
end
describe '#append_data' do
let(:model) { create(:ci_build_trace_chunk, :fog_with_data, initial_data: (+'😺').force_encoding('ASCII-8BIT')) }
let(:initial_data) { (+'😺').force_encoding('ASCII-8BIT') }
let(:model) { create(:ci_build_trace_chunk, :fog_with_data, initial_data: initial_data) }
let(:data) { data_store.data(model) }
it 'appends ASCII data' do
data_store.append_data(model, 'hello world', 4)
context 'when ci_job_trace_force_encode is enabled' do
it 'appends ASCII data' do
data_store.append_data(model, +'hello world', 4)
expect(data.encoding).to eq(Encoding.find('ASCII-8BIT'))
expect(data.force_encoding('UTF-8')).to eq('😺hello world')
expect(data.encoding).to eq(Encoding.find('ASCII-8BIT'))
expect(data.force_encoding('UTF-8')).to eq('😺hello world')
end
it 'appends UTF-8 data' do
data_store.append_data(model, +'Résumé', 4)
expect(data.encoding).to eq(Encoding.find('ASCII-8BIT'))
expect(data.force_encoding('UTF-8')).to eq("😺Résumé")
end
context 'when initial data is UTF-8' do
let(:initial_data) { +'😺' }
it 'appends ASCII data' do
data_store.append_data(model, +'hello world', 4)
expect(data.encoding).to eq(Encoding.find('ASCII-8BIT'))
expect(data.force_encoding('UTF-8')).to eq('😺hello world')
end
end
end
it 'throws an exception when appending UTF-8 data' do
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_exception).and_call_original
expect { data_store.append_data(model, 'Résumé', 4) }.to raise_exception(Encoding::CompatibilityError)
context 'when ci_job_trace_force_encode is disabled' do
before do
stub_feature_flags(ci_job_trace_force_encode: false)
end
it 'appends ASCII data' do
data_store.append_data(model, +'hello world', 4)
expect(data.encoding).to eq(Encoding.find('ASCII-8BIT'))
expect(data.force_encoding('UTF-8')).to eq('😺hello world')
end
it 'throws an exception when appending UTF-8 data' do
expect(Gitlab::ErrorTracking).to receive(:track_and_raise_exception).and_call_original
expect { data_store.append_data(model, +'Résumé', 4) }.to raise_exception(Encoding::CompatibilityError)
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment