Commit 0683d31a authored by James Edwards-Jones's avatar James Edwards-Jones

Can parse root .gitattributes file for a ref

parent be623ef3
......@@ -7,7 +7,7 @@
# repository-wide language statistics:
# <https://github.com/github/linguist/blob/v4.7.0/lib/linguist/lazy_blob.rb#L33-L36>
#
# The options passed by Linguist are those assumed by Gitlab::Git::Attributes
# The options passed by Linguist are those assumed by Gitlab::Git::InfoAttributes
# anyway, and there is no great efficiency gain from just fetching the listed
# attributes with our implementation, so we ignore the additional arguments.
#
......@@ -19,7 +19,7 @@ module Rugged
end
def attributes
@attributes ||= Gitlab::Git::Attributes.new(path)
@attributes ||= Gitlab::Git::InfoAttributes.new(path)
end
end
......
module Gitlab
module Git
# Parses root .gitattributes file at a given ref
class AttributesAtRefParser
delegate :attributes, to: :@parser
def initialize(repository, ref)
blob = repository.blob_at(ref, '.gitattributes')
@parser = AttributesParser.new(blob&.data)
end
end
end
end
# Gitaly note: JV: not sure what to make of this class. Why does it use
# the full disk path of the repository to look up attributes This is
# problematic in Gitaly, because Gitaly hides the full disk path to the
# repository from gitlab-ce.
module Gitlab
module Git
# Class for parsing Git attribute files and extracting the attributes for
# file patterns.
#
# Unlike Rugged this parser only needs a single IO call (a call to `open`),
# vastly reducing the time spent in extracting attributes.
#
# This class _only_ supports parsing the attributes file located at
# `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
# (`.gitattributes` is copied to this particular path).
#
# Basic usage:
#
# attributes = Gitlab::Git::Attributes.new(some_repo.path)
#
# attributes.attributes('README.md') # => { "eol" => "lf }
class Attributes
# path - The path to the Git repository.
def initialize(path)
@path = File.expand_path(path)
@patterns = nil
class AttributesParser
def initialize(attributes_data)
@data = attributes_data || ""
if @data.is_a?(File)
@patterns = parse_file
end
end
# Returns all the Git attributes for the given path.
#
# path - A path to a file for which to get the attributes.
# file_path - A path to a file for which to get the attributes.
#
# Returns a Hash.
def attributes(path)
full_path = File.join(@path, path)
def attributes(file_path)
absolute_path = File.join('/', file_path)
patterns.each do |pattern, attrs|
return attrs if File.fnmatch?(pattern, full_path)
return attrs if File.fnmatch?(pattern, absolute_path)
end
{}
......@@ -98,16 +82,10 @@ module Gitlab
# Iterates over every line in the attributes file.
def each_line
full_path = File.join(@path, 'info/attributes')
@data.each_line do |line|
break unless line.valid_encoding?
return unless File.exist?(full_path)
File.open(full_path, 'r') do |handle|
handle.each_line do |line|
break unless line.valid_encoding?
yield line.strip
end
yield line.strip
end
end
......@@ -125,7 +103,8 @@ module Gitlab
parsed = attrs ? parse_attributes(attrs) : {}
pairs << [File.join(@path, pattern), parsed]
absolute_pattern = File.join('/', pattern)
pairs << [absolute_pattern, parsed]
end
# Newer entries take precedence over older entries.
......
# Gitaly note: JV: not sure what to make of this class. Why does it use
# the full disk path of the repository to look up attributes This is
# problematic in Gitaly, because Gitaly hides the full disk path to the
# repository from gitlab-ce.
module Gitlab
module Git
# Parses gitattributes at `$GIT_DIR/info/attributes`
#
# Unlike Rugged this parser only needs a single IO call (a call to `open`),
# vastly reducing the time spent in extracting attributes.
#
# This class _only_ supports parsing the attributes file located at
# `$GIT_DIR/info/attributes` as GitLab doesn't use any other files
# (`.gitattributes` is copied to this particular path).
#
# Basic usage:
#
# attributes = Gitlab::Git::InfoAttributes.new(some_repo.path)
#
# attributes.attributes('README.md') # => { "eol" => "lf }
class InfoAttributes
delegate :attributes, :patterns, to: :parser
# path - The path to the Git repository.
def initialize(path)
@repo_path = File.expand_path(path)
end
def parser
@parser ||= begin
if File.exist?(attributes_path)
File.open(attributes_path, 'r') do |file_handle|
AttributesParser.new(file_handle)
end
else
AttributesParser.new("")
end
end
end
private
def attributes_path
@attributes_path ||= File.join(@repo_path, 'info/attributes')
end
end
end
end
......@@ -102,7 +102,7 @@ module Gitlab
)
@path = File.join(storage_path, @relative_path)
@name = @relative_path.split("/").last
@attributes = Gitlab::Git::Attributes.new(path)
@attributes = Gitlab::Git::InfoAttributes.new(path)
end
def ==(other)
......@@ -1011,6 +1011,18 @@ module Gitlab
attributes(path)[name]
end
# Check .gitattributes for a given ref
#
# This only checks the root .gitattributes file,
# it does not traverse subfolders to find additional .gitattributes files
#
# This method is around 30 times slower than `attributes`,
# which uses `$GIT_DIR/info/attributes`
def attributes_at(ref, file_path)
parser = AttributesAtRefParser.new(self, ref)
parser.attributes(file_path)
end
def languages(ref = nil)
Gitlab::GitalyClient.migrate(:commit_languages) do |is_enabled|
if is_enabled
......
require 'spec_helper'
describe Gitlab::Git::AttributesAtRefParser, seed_helper: true do
let(:project) { create(:project, :repository) }
let(:repository) { project.repository }
subject { described_class.new(repository, 'lfs') }
it 'loads .gitattributes blob' do
repository.raw # Initialize repository in advance since this also checks attributes
expected_filter = 'filter=lfs diff=lfs merge=lfs'
receive_blob = receive(:new).with(a_string_including(expected_filter))
expect(Gitlab::Git::AttributesParser).to receive_blob.and_call_original
subject
end
it 'handles missing blobs' do
expect { described_class.new(repository, 'non-existant-branch') }.not_to raise_error
end
describe '#attributes' do
it 'returns the attributes as a Hash' do
expect(subject.attributes('test.lfs')['filter']).to eq('lfs')
end
end
end
require 'spec_helper'
describe Gitlab::Git::Attributes, seed_helper: true do
let(:path) do
File.join(SEED_STORAGE_PATH, 'with-git-attributes.git')
end
describe Gitlab::Git::AttributesParser, seed_helper: true do
let(:attributes_path) { File.join(SEED_STORAGE_PATH, 'with-git-attributes.git', 'info', 'attributes') }
let(:data) { File.read(attributes_path) }
subject { described_class.new(path) }
subject { described_class.new(data) }
describe '#attributes' do
context 'using a path with attributes' do
......@@ -66,6 +65,26 @@ describe Gitlab::Git::Attributes, seed_helper: true do
expect(subject.attributes('test.foo')).to eq({})
end
end
context 'when attributes data is a file handle' do
subject do
File.open(attributes_path, 'r') do |file_handle|
described_class.new(file_handle)
end
end
it 'returns the attributes as a Hash' do
expect(subject.attributes('test.txt')).to eq({ 'text' => true })
end
end
context 'when attributes data is nil' do
let(:data) { nil }
it 'returns an empty Hash' do
expect(subject.attributes('test.foo')).to eq({})
end
end
end
describe '#patterns' do
......@@ -74,14 +93,14 @@ describe Gitlab::Git::Attributes, seed_helper: true do
end
it 'parses an entry that uses a tab to separate the pattern and attributes' do
expect(subject.patterns[File.join(path, '*.md')])
expect(subject.patterns[File.join('/', '*.md')])
.to eq({ 'gitlab-language' => 'markdown' })
end
it 'stores patterns in reverse order' do
first = subject.patterns.to_a[0]
expect(first[0]).to eq(File.join(path, 'bla/bla.txt'))
expect(first[0]).to eq(File.join('/', 'bla/bla.txt'))
end
# It's a bit hard to test for something _not_ being processed. As such we'll
......@@ -89,14 +108,6 @@ describe Gitlab::Git::Attributes, seed_helper: true do
it 'ignores any comments and empty lines' do
expect(subject.patterns.length).to eq(10)
end
it 'does not parse anything when the attributes file does not exist' do
expect(File).to receive(:exist?)
.with(File.join(path, 'info/attributes'))
.and_return(false)
expect(subject.patterns).to eq({})
end
end
describe '#parse_attributes' do
......@@ -132,17 +143,9 @@ describe Gitlab::Git::Attributes, seed_helper: true do
expect { |b| subject.each_line(&b) }.to yield_successive_args(*args)
end
it 'does not yield when the attributes file does not exist' do
expect(File).to receive(:exist?)
.with(File.join(path, 'info/attributes'))
.and_return(false)
expect { |b| subject.each_line(&b) }.not_to yield_control
end
it 'does not yield when the attributes file has an unsupported encoding' do
path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git')
attrs = described_class.new(path)
path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git', 'info', 'attributes')
attrs = described_class.new(File.read(path))
expect { |b| attrs.each_line(&b) }.not_to yield_control
end
......
require 'spec_helper'
describe Gitlab::Git::InfoAttributes, seed_helper: true do
let(:path) do
File.join(SEED_STORAGE_PATH, 'with-git-attributes.git')
end
subject { described_class.new(path) }
describe '#attributes' do
context 'using a path with attributes' do
it 'returns the attributes as a Hash' do
expect(subject.attributes('test.txt')).to eq({ 'text' => true })
end
it 'returns an empty Hash for a defined path without attributes' do
expect(subject.attributes('bla/bla.txt')).to eq({})
end
end
end
describe '#parser' do
it 'parses a file with entries' do
expect(subject.patterns).to be_an_instance_of(Hash)
expect(subject.patterns["/*.txt"]).to eq({ 'text' => true })
end
it 'does not parse anything when the attributes file does not exist' do
expect(File).to receive(:exist?)
.with(File.join(path, 'info/attributes'))
.and_return(false)
expect(subject.patterns).to eq({})
end
it 'does not parse attributes files with unsupported encoding' do
path = File.join(SEED_STORAGE_PATH, 'with-invalid-git-attributes.git')
subject = described_class.new(path)
expect(subject.patterns).to eq({})
end
end
end
......@@ -20,7 +20,7 @@ module TestEnv
'improve/awesome' => '5937ac0',
'merged-target' => '21751bf',
'markdown' => '0ed8c6c',
'lfs' => 'be93687',
'lfs' => '55bc176',
'master' => 'b83d6e3',
'merge-test' => '5937ac0',
"'test'" => 'e56497b',
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment