Commit 85fbb47b authored by Alejandro Rodríguez's avatar Alejandro Rodríguez Committed by Winnie Hellmann

Avoind unnecesary `force_encoding` operations

They're costly. This will also avoid some edge cases where
charlock_holmes assigns a weird encoding to a perfectly valid UTF-8
string.
parent 8e5a12ab
......@@ -14,9 +14,9 @@ module Gitlab
ENCODING_CONFIDENCE_THRESHOLD = 50
def encode!(message)
return nil unless message.respond_to? :force_encoding
return nil unless message.respond_to?(:force_encoding)
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
# if message is utf-8 encoding, just return it
message.force_encoding("UTF-8")
return message if message.valid_encoding?
......@@ -50,6 +50,9 @@ module Gitlab
end
def encode_utf8(message)
return nil unless message.is_a?(String)
return message if message.encoding == Encoding::UTF_8 && message.valid_encoding?
detect = CharlockHolmes::EncodingDetector.detect(message)
if detect && detect[:encoding]
begin
......
......@@ -6,6 +6,9 @@ describe Gitlab::EncodingHelper do
describe '#encode!' do
[
["nil", nil, nil],
["empty string", "".encode("ASCII-8BIT"), "".encode("UTF-8")],
["invalid utf-8 encoded string", "my bad string\xE5".force_encoding("UTF-8"), "my bad string"],
[
'leaves ascii only string as is',
'ascii only string',
......@@ -81,6 +84,9 @@ describe Gitlab::EncodingHelper do
describe '#encode_utf8' do
[
["nil", nil, nil],
["empty string", "".encode("ASCII-8BIT"), "".encode("UTF-8")],
["invalid utf-8 encoded string", "my bad string\xE5".force_encoding("UTF-8"), "my bad stringå"],
[
"encodes valid utf8 encoded string to utf8",
"λ, λ, λ".encode("UTF-8"),
......@@ -95,12 +101,18 @@ describe Gitlab::EncodingHelper do
"encodes valid ISO-8859-1 encoded string to utf8",
"Rüby ist eine Programmiersprache. Wir verlängern den text damit ICU die Sprache erkennen kann.".encode("ISO-8859-1", "UTF-8"),
"Rüby ist eine Programmiersprache. Wir verlängern den text damit ICU die Sprache erkennen kann.".encode("UTF-8")
],
[
# Test case from https://gitlab.com/gitlab-org/gitlab-ce/issues/39227
"Equifax branch name",
"refs/heads/Equifax".encode("UTF-8"),
"refs/heads/Equifax".encode("UTF-8")
]
].each do |description, test_string, xpect|
it description do
r = ext_class.encode_utf8(test_string.force_encoding('UTF-8'))
r = ext_class.encode_utf8(test_string)
expect(r).to eq(xpect)
expect(r.encoding.name).to eq('UTF-8')
expect(r.encoding.name).to eq('UTF-8') if xpect
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment