Commit b95f5ab5 authored by Sean McGivern's avatar Sean McGivern

Add banzai filter for processing upload links

This takes upload link processing away from RepositoryLinkFilter,
allowing the latter to perform Gitaly optimisations without worrying
about whether or not they make sense for upload links.

This filter also makes RepositoryLinkFilter a no-op if the only relative
links in a document are to uploads.
parent f525eb85
......@@ -280,7 +280,7 @@ module MarkupHelper
context.reverse_merge!(
current_user: (current_user if defined?(current_user)),
# RepositoryLinkFilter
# RepositoryLinkFilter and UploadLinkFilter
commit: @commit,
project_wiki: @project_wiki,
ref: @ref,
......
......@@ -65,7 +65,7 @@ module Banzai
el.attribute('href')
end
attrs += doc.search('img, video, audio').flat_map do |el|
attrs += doc.search('img:not(.gfm), video:not(.gfm), audio:not(.gfm)').flat_map do |el|
[el.attribute('src'), el.attribute('data-src')]
end
......
# frozen_string_literal: true
require 'uri'
module Banzai
module Filter
# HTML filter that "fixes" links to uploads.
#
# Context options:
# :group
# :only_path
# :project
# :system_note
class UploadLinkFilter < HTML::Pipeline::Filter
include Gitlab::Utils::StrongMemoize
def call
return doc if context[:system_note]
linkable_attributes.each do |attr|
if attr.value.start_with?('/uploads/')
process_link_to_upload_attr(attr)
attr.parent.add_class('gfm')
end
end
doc
end
protected
def linkable_attributes
strong_memoize(:linkable_attributes) do
attrs = []
attrs += doc.search('a:not(.gfm)').map do |el|
el.attribute('href')
end
attrs += doc.search('img:not(.gfm), video:not(.gfm), audio:not(.gfm)').flat_map do |el|
[el.attribute('src'), el.attribute('data-src')]
end
attrs.reject do |attr|
attr.blank? || attr.value.start_with?('//')
end
end
end
def process_link_to_upload_attr(html_attr)
path_parts = [unescape_and_scrub_uri(html_attr.value)]
if project
path_parts.unshift(relative_url_root, project.full_path)
elsif group
path_parts.unshift(relative_url_root, 'groups', group.full_path, '-')
else
path_parts.unshift(relative_url_root)
end
begin
path = Addressable::URI.escape(File.join(*path_parts))
rescue Addressable::URI::InvalidURIError
return
end
html_attr.value =
if context[:only_path]
path
else
Addressable::URI.join(Gitlab.config.gitlab.base_url, path).to_s
end
end
def relative_url_root
Gitlab.config.gitlab.relative_url_root.presence || '/'
end
def group
context[:group]
end
def project
context[:project]
end
private
def unescape_and_scrub_uri(uri)
Addressable::URI.unescape(uri).scrub
end
end
end
end
......@@ -16,6 +16,9 @@ module Banzai
[
Filter::ReferenceRedactorFilter,
Filter::InlineMetricsRedactorFilter,
# UploadLinkFilter must come before RepositoryLinkFilter to
# prevent unnecessary Gitaly calls from being made.
Filter::UploadLinkFilter,
Filter::RepositoryLinkFilter,
Filter::IssuableStateFilter,
Filter::SuggestionFilter
......
......@@ -208,6 +208,8 @@ describe 'GitLab Markdown', :aggregate_failures do
@group = @feat.group
end
let(:project) { @feat.project } # Shadow this so matchers can use it
context 'default pipeline' do
before do
@html = markdown(@feat.raw_markdown)
......@@ -216,6 +218,10 @@ describe 'GitLab Markdown', :aggregate_failures do
it_behaves_like 'all pipelines'
it 'includes custom filters' do
aggregate_failures 'UploadLinkFilter' do
expect(doc).to parse_upload_links
end
aggregate_failures 'RepositoryLinkFilter' do
expect(doc).to parse_repository_links
end
......@@ -277,6 +283,10 @@ describe 'GitLab Markdown', :aggregate_failures do
it_behaves_like 'all pipelines'
it 'includes custom filters' do
aggregate_failures 'UploadLinkFilter' do
expect(doc).to parse_upload_links
end
aggregate_failures 'RepositoryLinkFilter' do
expect(doc).not_to parse_repository_links
end
......
......@@ -111,6 +111,12 @@ Markdown should be usable inside a link. Let's try!
- [**text**](#link-strong)
- [`text`](#link-code)
### UploadLinkFilter
Linking to an upload in this project should work:
[Relative Upload Link](/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg)
![Relative Upload Image](/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg)
### RepositoryLinkFilter
Linking to a file relative to this project's repository should work.
......
......@@ -128,11 +128,6 @@ describe Banzai::Filter::RepositoryLinkFilter do
expect { filter(act) }.not_to raise_error
end
it 'does not raise an exception on URIs containing invalid utf-8 byte sequences in uploads' do
act = link("/uploads/%FF")
expect { filter(act) }.not_to raise_error
end
it 'does not raise an exception on URIs containing invalid utf-8 byte sequences in context requested path' do
expect { filter(link("files/test.md"), requested_path: '%FF') }.not_to raise_error
end
......@@ -147,11 +142,6 @@ describe Banzai::Filter::RepositoryLinkFilter do
expect { filter(act) }.not_to raise_error
end
it 'does not raise an exception with a space in the path' do
act = link("/uploads/d18213acd3732630991986120e167e3d/Landscape_8.jpg \nBut here's some more unexpected text :smile:)")
expect { filter(act) }.not_to raise_error
end
it 'ignores ref if commit is passed' do
doc = filter(link('non/existent.file'), commit: project.commit('empty-branch') )
expect(doc.at_css('a')['href'])
......@@ -350,166 +340,4 @@ describe Banzai::Filter::RepositoryLinkFilter do
include_examples :valid_repository
end
context 'with a /upload/ URL' do
# not needed
let(:commit) { nil }
let(:ref) { nil }
let(:requested_path) { nil }
let(:upload_path) { '/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg' }
let(:relative_path) { "/#{project.full_path}#{upload_path}" }
context 'to a project upload' do
shared_examples 'rewrite project uploads' do
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
end
end
it 'rebuilds relative URL for a link' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['href']).to eq(relative_path)
doc = filter(nested(link(upload_path)))
expect(doc.at_css('a')['href']).to eq(relative_path)
end
it 'rebuilds relative URL for an image' do
doc = filter(image(upload_path))
expect(doc.at_css('img')['src']).to eq(relative_path)
doc = filter(nested(image(upload_path)))
expect(doc.at_css('img')['src']).to eq(relative_path)
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
end
it 'supports unescaped Unicode filenames' do
path = '/uploads/한글.png'
doc = filter(link(path))
expect(doc.at_css('a')['href']).to eq("/#{project.full_path}/uploads/%ED%95%9C%EA%B8%80.png")
end
it 'supports escaped Unicode filenames' do
path = '/uploads/한글.png'
escaped = Addressable::URI.escape(path)
doc = filter(image(escaped))
expect(doc.at_css('img')['src']).to eq("/#{project.full_path}/uploads/%ED%95%9C%EA%B8%80.png")
end
end
context 'without project repository access' do
let(:project) { create(:project, :repository, repository_access_level: ProjectFeature::PRIVATE) }
it_behaves_like 'rewrite project uploads'
end
context 'with project repository access' do
it_behaves_like 'rewrite project uploads'
end
end
context 'to a group upload' do
let(:upload_link) { link('/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg') }
let(:group) { create(:group) }
let(:project) { nil }
let(:relative_path) { "/groups/#{group.full_path}/-/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg" }
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(absolute_path)
end
end
it 'rewrites the link correctly' do
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(relative_path)
end
it 'rewrites the link correctly for subgroup' do
group.update!(parent: create(:group))
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(relative_path)
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
end
end
context 'to a personal snippet' do
let(:group) { nil }
let(:project) { nil }
let(:relative_path) { '/uploads/-/system/personal_snippet/6/674e4f07fbf0a7736c3439212896e51a/example.tar.gz' }
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
end
end
context 'with a relative URL root' do
let(:gitlab_root) { '/gitlab' }
let(:absolute_path) { Gitlab.config.gitlab.url + gitlab_root + relative_path }
before do
stub_config_setting(relative_url_root: gitlab_root)
end
context 'with an absolute URL' do
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
end
end
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(gitlab_root + relative_path)
end
end
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(relative_path)
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
end
end
end
end
# frozen_string_literal: true
require 'spec_helper'
describe Banzai::Filter::UploadLinkFilter do
def filter(doc, contexts = {})
contexts.reverse_merge!(
project: project,
group: group,
only_path: only_path
)
described_class.call(doc, contexts)
end
def image(path)
%(<img src="#{path}" />)
end
def video(path)
%(<video src="#{path}"></video>)
end
def audio(path)
%(<audio src="#{path}"></audio>)
end
def link(path)
%(<a href="#{path}">#{path}</a>)
end
def nested(element)
%(<div>#{element}</div>)
end
let_it_be(:project) { create(:project, :public) }
let_it_be(:user) { create(:user) }
let(:group) { nil }
let(:project_path) { project.full_path }
let(:only_path) { true }
let(:upload_path) { '/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg' }
let(:relative_path) { "/#{project.full_path}#{upload_path}" }
context 'to a project upload' do
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
it 'rebuilds relative URL for a link' do
doc = filter(link(upload_path))
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
doc = filter(nested(link(upload_path)))
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
it 'rebuilds relative URL for an image' do
doc = filter(image(upload_path))
expect(doc.at_css('img')['src']).to eq(relative_path)
expect(doc.at_css('img').classes).to include('gfm')
doc = filter(nested(image(upload_path)))
expect(doc.at_css('img')['src']).to eq(relative_path)
expect(doc.at_css('img').classes).to include('gfm')
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
expect(doc.at_css('a').classes).not_to include('gfm')
end
it 'supports unescaped Unicode filenames' do
path = '/uploads/한글.png'
doc = filter(link(path))
expect(doc.at_css('a')['href']).to eq("/#{project.full_path}/uploads/%ED%95%9C%EA%B8%80.png")
expect(doc.at_css('a').classes).to include('gfm')
end
it 'supports escaped Unicode filenames' do
path = '/uploads/한글.png'
escaped = Addressable::URI.escape(path)
doc = filter(image(escaped))
expect(doc.at_css('img')['src']).to eq("/#{project.full_path}/uploads/%ED%95%9C%EA%B8%80.png")
expect(doc.at_css('img').classes).to include('gfm')
end
end
context 'to a group upload' do
let(:upload_link) { link('/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg') }
let_it_be(:group) { create(:group) }
let(:project) { nil }
let(:relative_path) { "/groups/#{group.full_path}/-/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg" }
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(absolute_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
it 'rewrites the link correctly' do
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
it 'rewrites the link correctly for subgroup' do
group.update!(parent: create(:group))
doc = filter(upload_link)
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
expect(doc.at_css('a').classes).not_to include('gfm')
end
end
context 'to a personal snippet' do
let(:group) { nil }
let(:project) { nil }
let(:relative_path) { '/uploads/-/system/personal_snippet/6/674e4f07fbf0a7736c3439212896e51a/example.tar.gz' }
context 'with an absolute URL' do
let(:absolute_path) { Gitlab.config.gitlab.url + relative_path }
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
context 'with a relative URL root' do
let(:gitlab_root) { '/gitlab' }
let(:absolute_path) { Gitlab.config.gitlab.url + gitlab_root + relative_path }
before do
stub_config_setting(relative_url_root: gitlab_root)
end
context 'with an absolute URL' do
let(:only_path) { false }
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(absolute_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(gitlab_root + relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
end
it 'rewrites the link correctly' do
doc = filter(link(relative_path))
expect(doc.at_css('a')['href']).to eq(relative_path)
expect(doc.at_css('a').classes).to include('gfm')
end
it 'does not modify absolute URL' do
doc = filter(link('http://example.com'))
expect(doc.at_css('a')['href']).to eq 'http://example.com'
expect(doc.at_css('a').classes).not_to include('gfm')
end
end
context 'invalid input' do
using RSpec::Parameterized::TableSyntax
where(:name, :href) do
'invalid URI' | '://foo'
'invalid UTF-8 byte sequences' | '%FF'
'garbled path' | 'open(/var/tmp/):%20/location%0Afrom:%20/test'
'whitespace' | "d18213acd3732630991986120e167e3d/Landscape_8.jpg\nand more"
end
with_them do
it { expect { filter(link("/uploads/#{href}")) }.not_to raise_error }
end
end
end
......@@ -10,6 +10,19 @@ module MarkdownMatchers
extend RSpec::Matchers::DSL
include Capybara::Node::Matchers
# UploadLinkFilter
matcher :parse_upload_links do
set_default_markdown_messages
match do |actual|
link = actual.at_css('a:contains("Relative Upload Link")')
image = actual.at_css('img[alt="Relative Upload Image"]')
expect(link['href']).to eq("/#{project.full_path}/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg")
expect(image['data-src']).to eq("/#{project.full_path}/uploads/e90decf88d8f96fe9e1389afc2e4a91f/test.jpg")
end
end
# RepositoryLinkFilter
matcher :parse_repository_links do
set_default_markdown_messages
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment