Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
573ec5c4
Commit
573ec5c4
authored
May 06, 2021
by
Robert May
Committed by
Nick Thomas
May 06, 2021
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Cached binary detection [RUN ALL RSPEC] [RUN AS-IF-FOSS]
parent
047ed0b5
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
107 additions
and
20 deletions
+107
-20
config/feature_flags/development/cached_encoding_detection.yml
...g/feature_flags/development/cached_encoding_detection.yml
+8
-0
lib/gitlab/blob_helper.rb
lib/gitlab/blob_helper.rb
+9
-7
lib/gitlab/encoding_helper.rb
lib/gitlab/encoding_helper.rb
+23
-8
lib/gitlab/git/blob.rb
lib/gitlab/git/blob.rb
+2
-2
lib/gitlab/gitaly_client/blobs_stitcher.rb
lib/gitlab/gitaly_client/blobs_stitcher.rb
+1
-1
spec/features/projects/diffs/diff_show_spec.rb
spec/features/projects/diffs/diff_show_spec.rb
+1
-1
spec/lib/gitlab/encoding_helper_spec.rb
spec/lib/gitlab/encoding_helper_spec.rb
+59
-0
spec/lib/gitlab/git/blame_spec.rb
spec/lib/gitlab/git/blame_spec.rb
+4
-1
No files found.
config/feature_flags/development/cached_encoding_detection.yml
0 → 100644
View file @
573ec5c4
---
name
:
cached_encoding_detection
introduced_by_url
:
https://gitlab.com/gitlab-org/gitlab/-/merge_requests/60128
rollout_issue_url
:
https://gitlab.com/gitlab-org/gitlab/-/issues/328819
milestone
:
'
13.12'
type
:
development
group
:
group::source code
default_enabled
:
false
lib/gitlab/blob_helper.rb
View file @
573ec5c4
...
@@ -38,7 +38,7 @@ module Gitlab
...
@@ -38,7 +38,7 @@ module Gitlab
# If Charlock says its binary
# If Charlock says its binary
else
else
detect
_encoding
[
:type
]
==
:binary
find
_encoding
[
:type
]
==
:binary
end
end
end
end
...
@@ -137,23 +137,25 @@ module Gitlab
...
@@ -137,23 +137,25 @@ module Gitlab
end
end
def
ruby_encoding
def
ruby_encoding
if
hash
=
detect
_encoding
if
hash
=
find
_encoding
hash
[
:ruby_encoding
]
hash
[
:ruby_encoding
]
end
end
end
end
def
encoding
def
encoding
if
hash
=
detect
_encoding
if
hash
=
find
_encoding
hash
[
:encoding
]
hash
[
:encoding
]
end
end
end
end
def
detect_encoding
@detect_encoding
||=
CharlockHolmes
::
EncodingDetector
.
new
.
detect
(
data
)
if
data
# rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def
empty?
def
empty?
data
.
nil?
||
data
==
""
data
.
nil?
||
data
==
""
end
end
private
def
find_encoding
@find_encoding
||=
Gitlab
::
EncodingHelper
.
detect_encoding
(
data
)
if
data
# rubocop:disable Gitlab/ModuleWithInstanceVariables
end
end
end
end
end
lib/gitlab/encoding_helper.rb
View file @
573ec5c4
...
@@ -20,7 +20,7 @@ module Gitlab
...
@@ -20,7 +20,7 @@ module Gitlab
return
message
if
message
.
valid_encoding?
return
message
if
message
.
valid_encoding?
# return message if message type is binary
# return message if message type is binary
detect
=
CharlockHolmes
::
EncodingDetector
.
detect
(
message
)
detect
=
detect_encoding
(
message
)
return
message
.
force_encoding
(
"BINARY"
)
if
detect_binary?
(
message
,
detect
)
return
message
.
force_encoding
(
"BINARY"
)
if
detect_binary?
(
message
,
detect
)
if
detect
&&
detect
[
:encoding
]
&&
detect
[
:confidence
]
>
ENCODING_CONFIDENCE_THRESHOLD
if
detect
&&
detect
[
:encoding
]
&&
detect
[
:confidence
]
>
ENCODING_CONFIDENCE_THRESHOLD
...
@@ -37,16 +37,30 @@ module Gitlab
...
@@ -37,16 +37,30 @@ module Gitlab
"--broken encoding:
#{
encoding
}
"
"--broken encoding:
#{
encoding
}
"
end
end
def
detect_encoding
(
data
,
limit:
CharlockHolmes
::
EncodingDetector
::
DEFAULT_BINARY_SCAN_LEN
,
cache_key:
nil
)
return
if
data
.
nil?
if
Feature
.
enabled?
(
:cached_encoding_detection
,
type: :development
,
default_enabled: :yaml
)
return
CharlockHolmes
::
EncodingDetector
.
new
(
limit
).
detect
(
data
)
unless
cache_key
.
present?
Rails
.
cache
.
fetch
([
:detect_binary
,
CharlockHolmes
::
VERSION
,
cache_key
],
expires_in:
1
.
week
)
do
CharlockHolmes
::
EncodingDetector
.
new
(
limit
).
detect
(
data
)
end
else
CharlockHolmes
::
EncodingDetector
.
new
(
limit
).
detect
(
data
)
end
end
def
detect_binary?
(
data
,
detect
=
nil
)
def
detect_binary?
(
data
,
detect
=
nil
)
detect
||=
CharlockHolmes
::
EncodingDetector
.
detect
(
data
)
detect
||=
detect_encoding
(
data
)
detect
&&
detect
[
:type
]
==
:binary
&&
detect
[
:confidence
]
==
100
detect
&&
detect
[
:type
]
==
:binary
&&
detect
[
:confidence
]
==
100
end
end
def
detect_libgit2_binary?
(
data
)
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
# EncodingDetector checks the first 1024 * 1024 bytes for NUL byte, libgit2 checks
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
# only the first 8000 (https://github.com/libgit2/libgit2/blob/2ed855a9e8f9af211e7274021c2264e600c0f86b/src/filter.h#L15),
# which is what we use below to keep a consistent behavior.
# which is what we use below to keep a consistent behavior.
def
detect_libgit2_binary?
(
data
,
cache_key:
nil
)
detect
=
CharlockHolmes
::
EncodingDetector
.
new
(
8000
).
detect
(
data
)
detect
=
detect_encoding
(
data
,
limit:
8000
,
cache_key:
cache_key
)
detect
&&
detect
[
:type
]
==
:binary
detect
&&
detect
[
:type
]
==
:binary
end
end
...
@@ -54,7 +68,8 @@ module Gitlab
...
@@ -54,7 +68,8 @@ module Gitlab
message
=
force_encode_utf8
(
message
)
message
=
force_encode_utf8
(
message
)
return
message
if
message
.
valid_encoding?
return
message
if
message
.
valid_encoding?
detect
=
CharlockHolmes
::
EncodingDetector
.
detect
(
message
)
detect
=
detect_encoding
(
message
)
if
detect
&&
detect
[
:encoding
]
if
detect
&&
detect
[
:encoding
]
begin
begin
CharlockHolmes
::
Converter
.
convert
(
message
,
detect
[
:encoding
],
'UTF-8'
)
CharlockHolmes
::
Converter
.
convert
(
message
,
detect
[
:encoding
],
'UTF-8'
)
...
...
lib/gitlab/git/blob.rb
View file @
573ec5c4
...
@@ -110,8 +110,8 @@ module Gitlab
...
@@ -110,8 +110,8 @@ module Gitlab
end
end
end
end
def
binary?
(
data
)
def
binary?
(
data
,
cache_key:
nil
)
EncodingHelper
.
detect_libgit2_binary?
(
data
)
EncodingHelper
.
detect_libgit2_binary?
(
data
,
cache_key:
cache_key
)
end
end
def
size_could_be_lfs?
(
size
)
def
size_could_be_lfs?
(
size
)
...
...
lib/gitlab/gitaly_client/blobs_stitcher.rb
View file @
573ec5c4
...
@@ -41,7 +41,7 @@ module Gitlab
...
@@ -41,7 +41,7 @@ module Gitlab
size:
blob_data
[
:size
],
size:
blob_data
[
:size
],
commit_id:
blob_data
[
:revision
],
commit_id:
blob_data
[
:revision
],
data:
data
,
data:
data
,
binary:
Gitlab
::
Git
::
Blob
.
binary?
(
data
)
binary:
Gitlab
::
Git
::
Blob
.
binary?
(
data
,
cache_key:
blob_data
[
:oid
]
)
)
)
end
end
end
end
...
...
spec/features/projects/diffs/diff_show_spec.rb
View file @
573ec5c4
...
@@ -2,7 +2,7 @@
...
@@ -2,7 +2,7 @@
require
'spec_helper'
require
'spec_helper'
RSpec
.
describe
'Diff file viewer'
,
:js
do
RSpec
.
describe
'Diff file viewer'
,
:js
,
:with_clean_rails_cache
do
let
(
:project
)
{
create
(
:project
,
:public
,
:repository
)
}
let
(
:project
)
{
create
(
:project
,
:public
,
:repository
)
}
def
visit_commit
(
sha
,
anchor:
nil
)
def
visit_commit
(
sha
,
anchor:
nil
)
...
...
spec/lib/gitlab/encoding_helper_spec.rb
View file @
573ec5c4
...
@@ -216,4 +216,63 @@ RSpec.describe Gitlab::EncodingHelper do
...
@@ -216,4 +216,63 @@ RSpec.describe Gitlab::EncodingHelper do
expect
(
test
).
not_to
eq
(
io_stream
)
expect
(
test
).
not_to
eq
(
io_stream
)
end
end
end
end
describe
'#detect_encoding'
do
subject
{
ext_class
.
detect_encoding
(
data
,
**
kwargs
)
}
let
(
:data
)
{
binary_string
}
let
(
:kwargs
)
{
{}
}
shared_examples
'detects encoding'
do
it
{
is_expected
.
to
be_a
(
Hash
)
}
it
'correctly detects the binary'
do
expect
(
subject
[
:type
]).
to
eq
(
:binary
)
end
context
'data is nil'
do
let
(
:data
)
{
nil
}
it
{
is_expected
.
to
be_nil
}
end
context
'limit is provided'
do
let
(
:kwargs
)
do
{
limit:
10
}
end
it
'correctly detects the binary'
do
expect
(
subject
[
:type
]).
to
eq
(
:binary
)
end
end
end
context
'cached_encoding_detection is enabled'
do
before
do
stub_feature_flags
(
cached_encoding_detection:
true
)
end
it_behaves_like
'detects encoding'
context
'cache_key is provided'
do
let
(
:kwargs
)
do
{
cache_key:
%w(foo bar)
}
end
it
'uses that cache_key to serve from the cache'
do
expect
(
Rails
.
cache
).
to
receive
(
:fetch
).
with
([
:detect_binary
,
CharlockHolmes
::
VERSION
,
%w(foo bar)
],
expires_in:
1
.
week
).
and_call_original
expect
(
subject
[
:type
]).
to
eq
(
:binary
)
end
end
end
context
'cached_encoding_detection is disabled'
do
before
do
stub_feature_flags
(
cached_encoding_detection:
false
)
end
it_behaves_like
'detects encoding'
end
end
end
end
spec/lib/gitlab/git/blame_spec.rb
View file @
573ec5c4
...
@@ -53,7 +53,10 @@ RSpec.describe Gitlab::Git::Blame, :seed_helper do
...
@@ -53,7 +53,10 @@ RSpec.describe Gitlab::Git::Blame, :seed_helper do
end
end
it
'converts to UTF-8'
do
it
'converts to UTF-8'
do
expect
(
CharlockHolmes
::
EncodingDetector
).
to
receive
(
:detect
).
and_return
(
nil
)
expect_next_instance_of
(
CharlockHolmes
::
EncodingDetector
)
do
|
detector
|
expect
(
detector
).
to
receive
(
:detect
).
and_return
(
nil
)
end
data
=
[]
data
=
[]
blame
.
each
do
|
commit
,
line
|
blame
.
each
do
|
commit
,
line
|
data
<<
{
data
<<
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment