Commit 0683d31a authored by James Edwards-Jones's avatar James Edwards-Jones

Can parse root .gitattributes file for a ref

parent be623ef3
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
# repository-wide language statistics: # repository-wide language statistics:
# <https://github.com/github/linguist/blob/v4.7.0/lib/linguist/lazy_blob.rb#L33-L36> # <https://github.com/github/linguist/blob/v4.7.0/lib/linguist/lazy_blob.rb#L33-L36>
# #
# The options passed by Linguist are those assumed by Gitlab::Git::Attributes # The options passed by Linguist are those assumed by Gitlab::Git::InfoAttributes
# anyway, and there is no great efficiency gain from just fetching the listed # anyway, and there is no great efficiency gain from just fetching the listed
# attributes with our implementation, so we ignore the additional arguments. # attributes with our implementation, so we ignore the additional arguments.
# #
...@@ -19,7 +19,7 @@ module Rugged ...@@ -19,7 +19,7 @@ module Rugged
end end
def attributes def attributes
@attributes ||= Gitlab::Git::Attributes.new(path) @attributes ||= Gitlab::Git::InfoAttributes.new(path)
end end
end end
......
module Gitlab
module Git
# Parses root .gitattributes file at a given ref
class AttributesAtRefParser
delegate :attributes, to: :@parser
def initialize(repository, ref)
blob = repository.blob_at(ref, '.gitattributes')
@parser = AttributesParser.new(blob&.data)
end
end
end
end
# Gitaly note: JV: not sure what to make of this class. Why does it use
# the full disk path of the repository to look up attributes This is
# problematic in Gitaly, because Gitaly hides the full disk path to the
# repository from gitlab-ce.
module Gitlab module Gitlab
module Git module Git
# Class for parsing Git attribute files and extracting the attributes for # Class for parsing Git attribute files and extracting the attributes for
# file patterns. # file patterns.
# class AttributesParser
# Unlike Rugged this parser only needs a single IO call (a call to `open`), def initialize(attributes_data)
# vastly reducing the time spent in extracting attributes. @data = attributes_data || ""
#
# This class _only_ supports parsing the attributes file located at if @data.is_a?(File)
# `$GIT_DIR/info/attributes` as GitLab doesn't use any other files @patterns = parse_file
# (`.gitattributes` is copied to this particular path). end
#
# Basic usage:
#
# attributes = Gitlab::Git::Attributes.new(some_repo.path)
#
# attributes.attributes('README.md') # => { "eol" => "lf }
class Attributes
# path - The path to the Git repository.
def initialize(path)
@path = File.expand_path(path)
@patterns = nil
end end
# Returns all the Git attributes for the given path. # Returns all the Git attributes for the given path.
# #
# path - A path to a file for which to get the attributes. # file_path - A path to a file for which to get the attributes.
# #
# Returns a Hash. # Returns a Hash.
def attributes(path) def attributes(file_path)
full_path = File.join(@path, path) absolute_path = File.join('/', file_path)
patterns.each do |pattern, attrs| patterns.each do |pattern, attrs|
return attrs if File.fnmatch?(pattern, full_path) return attrs if File.fnmatch?(pattern, absolute_path)
end end
{} {}
...@@ -98,16 +82,10 @@ module Gitlab ...@@ -98,16 +82,10 @@ module Gitlab
# Iterates over every line in the attributes file. # Iterates over every line in the attributes file.
def each_line def each_line
full_path = File.join(@path, 'info/attributes') @data.each_line do |line|
break unless line.valid_encoding?
return unless File.exist?(full_path) yield line.strip
File.open(full_path, 'r') do |handle|
handle.each_line do |line|
break unless line.valid_encoding?
yield line.strip
end
end end
end end
...@@ -125,7 +103,8 @@ module Gitlab ...@@ -125,7 +103,8 @@ module Gitlab
parsed = attrs ? parse_attributes(attrs) : {} parsed = attrs ? parse_attributes(attrs) : {}
pairs << [File.join(@path, pattern), parsed] absolute_pattern = File.join('/', pattern)
pairs << [absolute_pattern, parsed]
end end
# Newer entries take precedence over older entries. # Newer entries take precedence over older entries.
......
# Gitaly note: JV: not sure what to make of this class. Why does it use
# the full disk path of the repository to look up attributes This is
# problematic in Gitaly, because Gitaly hides the full disk path to the
# repository from gitlab-ce.
module Gitlab
module Git
# Parses gitattributes at `$GIT_DIR/info/attributes`
#
# Unlike Rugged this parser only needs a single IO call (a call to `open`),
# vastly reducing the time spent in extracting attributes.
#
# This class _only_ supports parsing the attributes file located at
# `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
# (`.gitattributes` is copied to this particular path).
#
# Basic usage:
#
# attributes = Gitlab::Git::InfoAttributes.new(some_repo.path)
#
# attributes.attributes('README.md') # => { "eol" => "lf }
class InfoAttributes
delegate :attributes, :patterns, to: :parser
# path - The path to the Git repository.
def initialize(path)
@repo_path = File.expand_path(path)
end
def parser
@parser ||= begin
if File.exist?(attributes_path)
File.open(attributes_path, 'r') do |file_handle|
AttributesParser.new(file_handle)
end
else
AttributesParser.new("")
end
end
end
private
def attributes_path
@attributes_path ||= File.join(@repo_path, 'info/attributes')
end
end
end
end
...@@ -102,7 +102,7 @@ module Gitlab ...@@ -102,7 +102,7 @@ module Gitlab
) )
@path = File.join(storage_path, @relative_path) @path = File.join(storage_path, @relative_path)
@name = @relative_path.split("/").last @name = @relative_path.split("/").last
@attributes = Gitlab::Git::Attributes.new(path) @attributes = Gitlab::Git::InfoAttributes.new(path)
end end
def ==(other) def ==(other)
...@@ -1011,6 +1011,18 @@ module Gitlab ...@@ -1011,6 +1011,18 @@ module Gitlab
attributes(path)[name] attributes(path)[name]
end end
# Check .gitattributes for a given ref
#
# This only checks the root .gitattributes file,
# it does not traverse subfolders to find additional .gitattributes files
#
# This method is around 30 times slower than `attributes`,
# which uses `$GIT_DIR/info/attributes`
def attributes_at(ref, file_path)
parser = AttributesAtRefParser.new(self, ref)
parser.attributes(file_path)
end
def languages(ref = nil) def languages(ref = nil)
Gitlab::GitalyClient.migrate(:commit_languages) do |is_enabled| Gitlab::GitalyClient.migrate(:commit_languages) do |is_enabled|
if is_enabled if is_enabled
......
require 'spec_helper'
describe Gitlab::Git::AttributesAtRefParser, seed_helper: true do
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
subject { described_class.new(repository, 'lfs') }
it 'loads .gitattributes blob' do
repository.raw # Initialize repository in advance since this also checks attributes
expected_filter = 'filter=lfs diff=lfs merge=lfs'
receive_blob = receive(:new).with(a_string_including(expected_filter))
expect(Gitlab::Git::AttributesParser).to receive_blob.and_call_original
subject
end
it 'handles missing blobs' do
expect { described_class.new(repository, 'non-existant-branch') }.not_to raise_error
end
describe '#attributes' do
it 'returns the attributes as a Hash' do
expect(subject.attributes('test.lfs')['filter']).to eq('lfs')
end
end
end
require 'spec_helper' require 'spec_helper'
describe Gitlab::Git::Attributes, seed_helper: true do describe Gitlab::Git::AttributesParser, seed_helper: true do
let(:path) do let(:attributes_path) { File.join(SEED_STORAGE_PATH, 'with-git-attributes.git', 'info', 'attributes') }
File.join(SEED_STORAGE_PATH, 'with-git-attributes.git') let(:data) { File.read(attributes_path) }
end
subject { described_class.new(path) } subject { described_class.new(data) }
describe '#attributes' do describe '#attributes' do
context 'using a path with attributes' do context 'using a path with attributes' do
...@@ -66,6 +65,26 @@ describe Gitlab::Git::Attributes, seed_helper: true do ...@@ -66,6 +65,26 @@ describe Gitlab::Git::Attributes, seed_helper: true do
expect(subject.attributes('test.foo')).to eq({}) expect(subject.attributes('test.foo')).to eq({})
end end
end end
context 'when attributes data is a file handle' do
subject do
File.open(attributes_path, 'r') do |file_handle|
described_class.new(file_handle)
end
end
it 'returns the attributes as a Hash' do
expect(subject.attributes('test.txt')).to eq({ 'text' => true })
end
end
context 'when attributes data is nil' do
let(:data) { nil }
it 'returns an empty Hash' do
expect(subject.attributes('test.foo')).to eq({})
end
end
end end
describe '#patterns' do describe '#patterns' do
...@@ -74,14 +93,14 @@ describe Gitlab::Git::Attributes, seed_helper: true do ...@@ -74,14 +93,14 @@ describe Gitlab::Git::Attributes, seed_helper: true do
end end
it 'parses an entry that uses a tab to separate the pattern and attributes' do it 'parses an entry that uses a tab to separate the pattern and attributes' do
expect(subject.patterns[File.join(path, '*.md')]) expect(subject.patterns[File.join('/', '*.md')])
.to eq({ 'gitlab-language' => 'markdown' }) .to eq({ 'gitlab-language' => 'markdown' })
end end
it 'stores patterns in reverse order' do it 'stores patterns in reverse order' do
first = subject.patterns.to_a[0] first = subject.patterns.to_a[0]
expect(first[0]).to eq(File.join(path, 'bla/bla.txt')) expect(first[0]).to eq(File.join('/', 'bla/bla.txt'))
end end
# It's a bit hard to test for something _not_ being processed. As such we'll # It's a bit hard to test for something _not_ being processed. As such we'll
...@@ -89,14 +108,6 @@ describe Gitlab::Git::Attributes, seed_helper: true do ...@@ -89,14 +108,6 @@ describe Gitlab::Git::Attributes, seed_helper: true do
it 'ignores any comments and empty lines' do it 'ignores any comments and empty lines' do
expect(subject.patterns.length).to eq(10) expect(subject.patterns.length).to eq(10)
end end
it 'does not parse anything when the attributes file does not exist' do
expect(File).to receive(:exist?)
.with(File.join(path, 'info/attributes'))
.and_return(false)
expect(subject.patterns).to eq({})
end
end end
describe '#parse_attributes' do describe '#parse_attributes' do
...@@ -132,17 +143,9 @@ describe Gitlab::Git::Attributes, seed_helper: true do ...@@ -132,17 +143,9 @@ describe Gitlab::Git::Attributes, seed_helper: true do
expect { |b| subject.each_line(&b) }.to yield_successive_args(*args) expect { |b| subject.each_line(&b) }.to yield_successive_args(*args)
end end
it 'does not yield when the attributes file does not exist' do
expect(File).to receive(:exist?)
.with(File.join(path, 'info/attributes'))
.and_return(false)
expect { |b| subject.each_line(&b) }.not_to yield_control
end
it 'does not yield when the attributes file has an unsupported encoding' do it 'does not yield when the attributes file has an unsupported encoding' do
path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git') path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git', 'info', 'attributes')
attrs = described_class.new(path) attrs = described_class.new(File.read(path))
expect { |b| attrs.each_line(&b) }.not_to yield_control expect { |b| attrs.each_line(&b) }.not_to yield_control
end end
......
require 'spec_helper'
describe Gitlab::Git::InfoAttributes, seed_helper: true do
let(:path) do
File.join(SEED_STORAGE_PATH, 'with-git-attributes.git')
end
subject { described_class.new(path) }
describe '#attributes' do
context 'using a path with attributes' do
it 'returns the attributes as a Hash' do
expect(subject.attributes('test.txt')).to eq({ 'text' => true })
end
it 'returns an empty Hash for a defined path without attributes' do
expect(subject.attributes('bla/bla.txt')).to eq({})
end
end
end
describe '#parser' do
it 'parses a file with entries' do
expect(subject.patterns).to be_an_instance_of(Hash)
expect(subject.patterns["/*.txt"]).to eq({ 'text' => true })
end
it 'does not parse anything when the attributes file does not exist' do
expect(File).to receive(:exist?)
.with(File.join(path, 'info/attributes'))
.and_return(false)
expect(subject.patterns).to eq({})
end
it 'does not parse attributes files with unsupported encoding' do
path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git')
subject = described_class.new(path)
expect(subject.patterns).to eq({})
end
end
end
...@@ -20,7 +20,7 @@ module TestEnv ...@@ -20,7 +20,7 @@ module TestEnv
'improve/awesome' => '5937ac0', 'improve/awesome' => '5937ac0',
'merged-target' => '21751bf', 'merged-target' => '21751bf',
'markdown' => '0ed8c6c', 'markdown' => '0ed8c6c',
'lfs' => 'be93687', 'lfs' => '55bc176',
'master' => 'b83d6e3', 'master' => 'b83d6e3',
'merge-test' => '5937ac0', 'merge-test' => '5937ac0',
"'test'" => 'e56497b', "'test'" => 'e56497b',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment