Commit 6b3f528f authored by rpereira2's avatar rpereira2

Set encoding of logs to UTF-8

* Before the logs can be returned in the API response, the log strings
should be converted to our default encoding, which is UTF-8.

There is a comment on the following issue with more details:
https://gitlab.com/gitlab-org/gitlab/issues/34965#note_292261879
parent 88a7dc21
---
title: Fix 500 error caused by Kubernetes logs not being encoded in UTF-8
merge_request: 25999
author:
type: fixed
...@@ -3,6 +3,9 @@ ...@@ -3,6 +3,9 @@
module PodLogs module PodLogs
class KubernetesService < BaseService class KubernetesService < BaseService
LOGS_LIMIT = 500.freeze LOGS_LIMIT = 500.freeze
REPLACEMENT_CHAR = "\u{FFFD}"
EncodingHelperError = Class.new(StandardError)
steps :check_arguments, steps :check_arguments,
:check_param_lengths, :check_param_lengths,
...@@ -11,6 +14,8 @@ module PodLogs ...@@ -11,6 +14,8 @@ module PodLogs
:check_pod_name, :check_pod_name,
:check_container_name, :check_container_name,
:pod_logs, :pod_logs,
:encode_logs_to_utf8,
:split_logs,
:filter_return_keys :filter_return_keys
self.reactive_cache_worker_finder = ->(id, _cache_key, namespace, params) { new(::Clusters::Cluster.find(id), namespace, params: params) } self.reactive_cache_worker_finder = ->(id, _cache_key, namespace, params) { new(::Clusters::Cluster.find(id), namespace, params: params) }
...@@ -18,7 +23,7 @@ module PodLogs ...@@ -18,7 +23,7 @@ module PodLogs
private private
def pod_logs(result) def pod_logs(result)
logs = cluster.kubeclient.get_pod_log( result[:logs] = cluster.kubeclient.get_pod_log(
result[:pod_name], result[:pod_name],
namespace, namespace,
container: result[:container_name], container: result[:container_name],
...@@ -26,7 +31,32 @@ module PodLogs ...@@ -26,7 +31,32 @@ module PodLogs
timestamps: true timestamps: true
).body ).body
result[:logs] = logs.strip.lines(chomp: true).map do |line| success(result)
rescue Kubeclient::ResourceNotFoundError
error(_('Pod not found'))
rescue Kubeclient::HttpError => e
::Gitlab::ErrorTracking.track_exception(e)
error(_('Kubernetes API returned status code: %{error_code}') % {
error_code: e.error_code
})
end
# Check https://gitlab.com/gitlab-org/gitlab/issues/34965#note_292261879
# for more details on why this is necessary.
def encode_logs_to_utf8(result)
return success(result) if result[:logs].nil?
return success(result) if result[:logs].encoding == Encoding::UTF_8
result[:logs] = encode_utf8(result[:logs])
success(result)
rescue EncodingHelperError
error(_('Unable to convert Kubernetes logs encoding to UTF-8'))
end
def split_logs(result)
result[:logs] = result[:logs].strip.lines(chomp: true).map do |line|
# message contains a RFC3339Nano timestamp, then a space, then the log line. # message contains a RFC3339Nano timestamp, then a space, then the log line.
# resolution of the nanoseconds can vary, so we split on the first space # resolution of the nanoseconds can vary, so we split on the first space
values = line.split(' ', 2) values = line.split(' ', 2)
...@@ -37,14 +67,22 @@ module PodLogs ...@@ -37,14 +67,22 @@ module PodLogs
end end
success(result) success(result)
rescue Kubeclient::ResourceNotFoundError end
error(_('Pod not found'))
rescue Kubeclient::HttpError => e
::Gitlab::ErrorTracking.track_exception(e)
error(_('Kubernetes API returned status code: %{error_code}') % { def encode_utf8(logs)
error_code: e.error_code utf8_logs = Gitlab::EncodingHelper.encode_utf8(logs.dup, replace: REPLACEMENT_CHAR)
})
# Gitlab::EncodingHelper.encode_utf8 can return '' or nil if an exception
# is raised while encoding. We prefer to return an error rather than wrongly
# display blank logs.
no_utf8_logs = logs.present? && utf8_logs.blank?
unexpected_encoding = utf8_logs&.encoding != Encoding::UTF_8
if no_utf8_logs || unexpected_encoding
raise EncodingHelperError, 'Could not convert Kubernetes logs to UTF-8'
end
utf8_logs
end end
end end
end end
...@@ -11,12 +11,10 @@ describe ::PodLogs::KubernetesService do ...@@ -11,12 +11,10 @@ describe ::PodLogs::KubernetesService do
let(:pod_name) { 'pod-1' } let(:pod_name) { 'pod-1' }
let(:container_name) { 'container-1' } let(:container_name) { 'container-1' }
let(:params) { {} } let(:params) { {} }
let(:expected_logs) do
[ let(:raw_logs) do
{ message: "Log 1", timestamp: "2019-12-13T14:04:22.123456Z" }, "2019-12-13T14:04:22.123456Z Log 1\n2019-12-13T14:04:23.123456Z Log 2\n" \
{ message: "Log 2", timestamp: "2019-12-13T14:04:23.123456Z" }, "2019-12-13T14:04:24.123456Z Log 3"
{ message: "Log 3", timestamp: "2019-12-13T14:04:24.123456Z" }
]
end end
subject { described_class.new(cluster, namespace, params: params) } subject { described_class.new(cluster, namespace, params: params) }
...@@ -28,6 +26,8 @@ describe ::PodLogs::KubernetesService do ...@@ -28,6 +26,8 @@ describe ::PodLogs::KubernetesService do
container_name: container_name container_name: container_name
} }
end end
let(:expected_logs) { raw_logs }
let(:service) { create(:cluster_platform_kubernetes, :configured) } let(:service) { create(:cluster_platform_kubernetes, :configured) }
it 'returns the logs' do it 'returns the logs' do
...@@ -63,4 +63,104 @@ describe ::PodLogs::KubernetesService do ...@@ -63,4 +63,104 @@ describe ::PodLogs::KubernetesService do
expect(result[:message]).to eq('Kubernetes API returned status code: 500') expect(result[:message]).to eq('Kubernetes API returned status code: 500')
end end
end end
describe '#encode_logs_to_utf8', :aggregate_failures do
let(:service) { create(:cluster_platform_kubernetes, :configured) }
let(:expected_logs) { '2019-12-13T14:04:22.123456Z ✔ Started logging errors to Sentry' }
let(:raw_logs) { expected_logs.dup.force_encoding(Encoding::ASCII_8BIT) }
let(:result) { subject.send(:encode_logs_to_utf8, result_arg) }
let(:result_arg) do
{
pod_name: pod_name,
container_name: container_name,
logs: raw_logs
}
end
it 'converts logs to utf-8' do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
it 'returns error if output of encoding helper is blank' do
allow(Gitlab::EncodingHelper).to receive(:encode_utf8).and_return('')
expect(result[:status]).to eq(:error)
expect(result[:message]).to eq('Unable to convert Kubernetes logs encoding to UTF-8')
end
it 'returns error if output of encoding helper is nil' do
allow(Gitlab::EncodingHelper).to receive(:encode_utf8).and_return(nil)
expect(result[:status]).to eq(:error)
expect(result[:message]).to eq('Unable to convert Kubernetes logs encoding to UTF-8')
end
it 'returns error if output of encoding helper is not UTF-8' do
allow(Gitlab::EncodingHelper).to receive(:encode_utf8)
.and_return(expected_logs.encode(Encoding::UTF_16BE))
expect(result[:status]).to eq(:error)
expect(result[:message]).to eq('Unable to convert Kubernetes logs encoding to UTF-8')
end
context 'when logs are nil' do
let(:raw_logs) { nil }
let(:expected_logs) { nil }
it 'returns nil' do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
end
context 'when logs are blank' do
let(:raw_logs) { (+'').force_encoding(Encoding::ASCII_8BIT) }
let(:expected_logs) { '' }
it 'returns blank string' do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
end
context 'when logs are already in utf-8' do
let(:raw_logs) { expected_logs }
it 'does not fail' do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
end
end
describe '#split_logs' do
let(:service) { create(:cluster_platform_kubernetes, :configured) }
let(:expected_logs) do
[
{ message: "Log 1", timestamp: "2019-12-13T14:04:22.123456Z" },
{ message: "Log 2", timestamp: "2019-12-13T14:04:23.123456Z" },
{ message: "Log 3", timestamp: "2019-12-13T14:04:24.123456Z" }
]
end
let(:result_arg) do
{
pod_name: pod_name,
container_name: container_name,
logs: raw_logs
}
end
it 'returns the logs' do
result = subject.send(:split_logs, result_arg)
aggregate_failures do
expect(result[:status]).to eq(:success)
expect(result[:logs]).to eq(expected_logs)
end
end
end
end end
...@@ -50,7 +50,7 @@ module Gitlab ...@@ -50,7 +50,7 @@ module Gitlab
detect && detect[:type] == :binary detect && detect[:type] == :binary
end end
def encode_utf8(message) def encode_utf8(message, replace: "")
message = force_encode_utf8(message) message = force_encode_utf8(message)
return message if message.valid_encoding? return message if message.valid_encoding?
...@@ -64,7 +64,7 @@ module Gitlab ...@@ -64,7 +64,7 @@ module Gitlab
'' ''
end end
else else
clean(message) clean(message, replace: replace)
end end
rescue ArgumentError rescue ArgumentError
nil nil
...@@ -94,8 +94,13 @@ module Gitlab ...@@ -94,8 +94,13 @@ module Gitlab
message.force_encoding("UTF-8") message.force_encoding("UTF-8")
end end
def clean(message) def clean(message, replace: "")
message.encode("UTF-16BE", undef: :replace, invalid: :replace, replace: "".encode("UTF-16BE")) message.encode(
"UTF-16BE",
undef: :replace,
invalid: :replace,
replace: replace.encode("UTF-16BE")
)
.encode("UTF-8") .encode("UTF-8")
.gsub("\0".encode("UTF-8"), "") .gsub("\0".encode("UTF-8"), "")
end end
......
...@@ -20930,6 +20930,9 @@ msgstr "" ...@@ -20930,6 +20930,9 @@ msgstr ""
msgid "Unable to connect to server: %{error}" msgid "Unable to connect to server: %{error}"
msgstr "" msgstr ""
msgid "Unable to convert Kubernetes logs encoding to UTF-8"
msgstr ""
msgid "Unable to fetch unscanned projects" msgid "Unable to fetch unscanned projects"
msgstr "" msgstr ""
......
...@@ -128,6 +128,12 @@ describe Gitlab::EncodingHelper do ...@@ -128,6 +128,12 @@ describe Gitlab::EncodingHelper do
expect { ext_class.encode_utf8('') }.not_to raise_error expect { ext_class.encode_utf8('') }.not_to raise_error
end end
it 'replaces invalid and undefined chars with the replace argument' do
str = 'hællo'.encode(Encoding::UTF_16LE).force_encoding(Encoding::ASCII_8BIT)
expect(ext_class.encode_utf8(str, replace: "\u{FFFD}")).to eq("h�llo")
end
context 'with strings that can be forcefully encoded into utf8' do context 'with strings that can be forcefully encoded into utf8' do
let(:test_string) do let(:test_string) do
"refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT") "refs/heads/FixSymbolsTitleDropdown".encode("ASCII-8BIT")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment