Commit 6b3f528f authored by rpereira2's avatar rpereira2

Set encoding of logs to UTF-8

* Before the logs can be returned in the API response, the log strings
should be converted to our default encoding, which is UTF-8.

There is a comment on the following issue with more details:
https://gitlab.com/gitlab-org/gitlab/issues/34965#note_292261879
parent 88a7dc21
---
title: Fix 500 error caused by Kubernetes logs not being encoded in UTF-8
merge_request: 25999
author:
type: fixed
......@@ -3,6 +3,9 @@
module PodLogs
class KubernetesService < BaseService
LOGS_LIMIT = 500.freeze
REPLACEMENT_CHAR = "\u{FFFD}"
EncodingHelperError = Class.new(StandardError)
steps :check_arguments,
:check_param_lengths,
......@@ -11,6 +14,8 @@ module PodLogs
:check_pod_name,
:check_container_name,
:pod_logs,
:encode_logs_to_utf8,
:split_logs,
:filter_return_keys
self.reactive_cache_worker_finder = ->(id, _cache_key, namespace, params) { new(::Clusters::Cluster.find(id), namespace, params: params) }
......@@ -18,7 +23,7 @@ module PodLogs
private
def pod_logs(result)
logs = cluster.kubeclient.get_pod_log(
result[:logs] = cluster.kubeclient.get_pod_log(
result[:pod_name],
namespace,
container: result[:container_name],
......@@ -26,7 +31,32 @@ module PodLogs
timestamps: true
).body
result[:logs] = logs.strip.lines(chomp: true).map do |line|
success(result)
rescue Kubeclient::ResourceNotFoundError
error(_('Pod not found'))
rescue Kubeclient::HttpError => e
::Gitlab::ErrorTracking.track_exception(e)
error(_('Kubernetes API returned status code: %{error_code}') % {
error_code: e.error_code
})
end
# Check https://gitlab.com/gitlab-org/gitlab/issues/34965#note_292261879
# for more details on why this is necessary.
def encode_logs_to_utf8(result)
return success(result) if result[:logs].nil?
return success(result) if result[:logs].encoding == Encoding::UTF_8
result[:logs] = encode_utf8(result[:logs])
success(result)
rescue EncodingHelperError
error(_('Unable to convert Kubernetes logs encoding to UTF-8'))
end
def split_logs(result)
result[:logs] = result[:logs].strip.lines(chomp: true).map do |line|
# message contains a RFC3339Nano timestamp, then a space, then the log line.
# resolution of the nanoseconds can vary, so we split on the first space
values = line.split(' ', 2)
......@@ -37,14 +67,22 @@ module PodLogs
end
success(result)
rescue Kubeclient::ResourceNotFoundError
error(_('Pod not found'))
rescue Kubeclient::HttpError => e
::Gitlab::ErrorTracking.track_exception(e)
end
error(_('Kubernetes API returned status code: %{error_code}') % {
error_code: e.error_code
})
def encode_utf8(logs)
utf8_logs = Gitlab::EncodingHelper.encode_utf8(logs.dup, replace: REPLACEMENT_CHAR)
# Gitlab::EncodingHelper.encode_utf8 can return '' or nil if an exception
# is raised while encoding. We prefer to return an error rather than wrongly
# display blank logs.
no_utf8_logs = logs.present? && utf8_logs.blank?
unexpected_encoding = utf8_logs&.encoding != Encoding::UTF_8
if no_utf8_logs || unexpected_encoding
raise EncodingHelperError, 'Could not convert Kubernetes logs to UTF-8'
end
utf8_logs
end
end
end
......@@ -11,12 +11,10 @@ describe ::PodLogs::KubernetesService do
let(:pod_name) { 'pod-1' }
let(:container_name) { 'container-1' }
let(:params) { {} }
let(:expected_logs) do
[
{ message: "Log 1", timestamp: "2019-12-13T14:04:22.123456Z" },
{ message: "Log 2", timestamp: "2019-12-13T14:04:23.123456Z" },
{ message: "Log 3", timestamp: "2019-12-13T14:04:24.123456Z" }
]
let(:raw_logs) do
"2019-12-13T14:04:22.123456Z Log 1\n2019-12-13T14:04:23.123456Z Log 2\n" \
"2019-12-13T14:04:24.123456Z Log 3"
end
subject { described_class.new(cluster, namespace, params: params) }
......@@ -28,6 +26,8 @@ describe ::PodLogs::KubernetesService do
container_name: container_name
}
end
let(:expected_logs) { raw_logs }
let(:service) { create(:cluster_platform_kubernetes, :configured) }
it 'returns the logs' do
......@@ -63,4 +63,104 @@ describe ::PodLogs::KubernetesService do
expect(result[:message]).to eq('Kubernetes API returned status code: 500')
end
end
describe '#encode_logs_to_utf8', :aggregate_failures do
let(:service) { create(:cluster_platform_kubernetes, :configured) }
let(:expected_logs) { '2019-12-13T14:04:22.123456Z ✔ Started logging errors to Sentry' }
let(:raw_logs) { expected_logs.dup.force_encoding(Encoding::ASCII_8BIT) }
let(:result) { subject.send(:encode_logs_to_utf8, result_arg) }
let(:result_arg) do
{
pod_name: pod_name,
container_name: container_name,
logs: raw_logs
}
end
it 'converts logs to utf-8' do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
it 'returns error if output of encoding helper is blank' do
allow(Gitlab::EncodingHelper).to receive(:encode_utf8).and_return('')
expect(result[:status]).to eq(:error)
expect(result[:message]).to eq('Unable to convert Kubernetes logs encoding to UTF-8')
end
it 'returns error if output of encoding helper is nil' do
allow(Gitlab::EncodingHelper).to receive(:encode_utf8).and_return(nil)
expect(result[:status]).to eq(:error)
expect(result[:message]).to eq('Unable to convert Kubernetes logs encoding to UTF-8')
end
it 'returns error if output of encoding helper is not UTF-8' do
allow(Gitlab::EncodingHelper).to receive(:encode_utf8)
.and_return(expected_logs.encode(Encoding::UTF_16BE))
expect(result[:status]).to eq(:error)
expect(result[:message]).to eq('Unable to convert Kubernetes logs encoding to UTF-8')
end
context 'when logs are nil' do
let(:raw_logs) { nil }
let(:expected_logs) { nil }
it 'returns nil' do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
end
context 'when logs are blank' do
let(:raw_logs) { (+'').force_encoding(Encoding::ASCII_8BIT) }
let(:expected_logs) { '' }
it 'returns blank string' do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
end
context 'when logs are already in utf-8' do
let(:raw_logs) { expected_logs }
it 'does not fail' do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
end
end
describe '#split_logs' do
let(:service) { create(:cluster_platform_kubernetes, :configured) }
let(:expected_logs) do
[
{ message: "Log 1", timestamp: "2019-12-13T14:04:22.123456Z" },
{ message: "Log 2", timestamp: "2019-12-13T14:04:23.123456Z" },
{ message: "Log 3", timestamp: "2019-12-13T14:04:24.123456Z" }
]
end
let(:result_arg) do
{
pod_name: pod_name,
container_name: container_name,
logs: raw_logs
}
end
it 'returns the logs' do
result = subject.send(:split_logs, result_arg)
aggregate_failures do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
end
end
end
......@@ -50,7 +50,7 @@ module Gitlab
detect && detect[:type] == :binary
end
def encode_utf8(message)
def encode_utf8(message, replace: "")
message = force_encode_utf8(message)
return message if message.valid_encoding?
......@@ -64,7 +64,7 @@ module Gitlab
''
end
else
clean(message)
clean(message, replace: replace)
end
rescue ArgumentError
nil
......@@ -94,8 +94,13 @@ module Gitlab
message.force_encoding("UTF-8")
end
def clean(message)
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "".encode("UTF-16BE"))
def clean(message, replace: "")
message.encode(
"UTF-16BE",
undef: :replace,
invalid: :replace,
replace: replace.encode("UTF-16BE")
)
.encode("UTF-8")
.gsub("\0".encode("UTF-8"), "")
end
......
......@@ -20930,6 +20930,9 @@ msgstr ""
msgid "Unable to connect to server: %{error}"
msgstr ""
msgid "Unable to convert Kubernetes logs encoding to UTF-8"
msgstr ""
msgid "Unable to fetch unscanned projects"
msgstr ""
......
......@@ -128,6 +128,12 @@ describe Gitlab::EncodingHelper do
expect { ext_class.encode_utf8('') }.not_to raise_error
end
it 'replaces invalid and undefined chars with the replace argument' do
str = 'hællo'.encode(Encoding::UTF_16LE).force_encoding(Encoding::ASCII_8BIT)
expect(ext_class.encode_utf8(str, replace: "\u{FFFD}")).to eq("h�llo")
end
context 'with strings that can be forcefully encoded into utf8' do
let(:test_string) do
"refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment