Commit 40983f4a authored by Brett Walker's avatar Brett Walker Committed by Yorick Peterse

Show tooltip for malicious looking links

Such as those with IDN homographs or embedded
right-to-left (RTLO) characters.

Autolinked hrefs should be escaped
parent b3c13bbb
---
title: Make potentially malicious links more visible in the UI and scrub RTLO chars from links
merge_request: 2770
author:
type: security
...@@ -8,6 +8,10 @@ module Banzai ...@@ -8,6 +8,10 @@ module Banzai
# #
# Based on HTML::Pipeline::AutolinkFilter # Based on HTML::Pipeline::AutolinkFilter
# #
# Note that our CommonMark parser, `commonmarker` (using the autolink extension)
# handles standard autolinking, like http/https. We detect additional
# schemes (smb, rdar, etc).
#
# Context options: # Context options:
# :autolink - Boolean, skips all processing done by this filter when false # :autolink - Boolean, skips all processing done by this filter when false
# :link_attr - Hash of attributes for the generated links # :link_attr - Hash of attributes for the generated links
...@@ -107,10 +111,13 @@ module Banzai ...@@ -107,10 +111,13 @@ module Banzai
end end
end end
# match has come from node.to_html above, so we know it's encoded # Since this came from a Text node, make sure the new href is encoded.
# correctly. # `commonmarker` percent encodes the domains of links it handles, so
# do the same (instead of using `normalized_encode`).
href_safe = Addressable::URI.encode(match).html_safe
html_safe_match = match.html_safe html_safe_match = match.html_safe
options = link_options.merge(href: html_safe_match) options = link_options.merge(href: href_safe)
content_tag(:a, html_safe_match, options) + dropped content_tag(:a, html_safe_match, options) + dropped
end end
......
...@@ -4,17 +4,29 @@ module Banzai ...@@ -4,17 +4,29 @@ module Banzai
module Filter module Filter
# HTML Filter to modify the attributes of external links # HTML Filter to modify the attributes of external links
class ExternalLinkFilter < HTML::Pipeline::Filter class ExternalLinkFilter < HTML::Pipeline::Filter
SCHEMES = ['http', 'https', nil].freeze SCHEMES = ['http', 'https', nil].freeze
RTLO = "\u202E".freeze
ENCODED_RTLO = '%E2%80%AE'.freeze
def call def call
links.each do |node| links.each do |node|
uri = uri(node['href'].to_s) # URI.parse does stricter checking on the url than Addressable,
# such as on `mailto:` links. Since we've been using it, do an
node.set_attribute('href', uri.to_s) if uri # initial parse for validity and then use Addressable
# for IDN support, etc
uri = uri_strict(node['href'].to_s)
if uri
node.set_attribute('href', uri.to_s)
addressable_uri = addressable_uri(node['href'])
else
addressable_uri = nil
end
if SCHEMES.include?(uri&.scheme) && !internal_url?(uri) unless internal_url?(addressable_uri)
node.set_attribute('rel', 'nofollow noreferrer noopener') punycode_autolink_node!(addressable_uri, node)
node.set_attribute('target', '_blank') sanitize_link_text!(node)
add_malicious_tooltip!(addressable_uri, node)
add_nofollow!(addressable_uri, node)
end end
end end
...@@ -23,12 +35,18 @@ module Banzai ...@@ -23,12 +35,18 @@ module Banzai
private private
def uri(href) def uri_strict(href)
URI.parse(href) URI.parse(href)
rescue URI::Error rescue URI::Error
nil nil
end end
def addressable_uri(href)
Addressable::URI.parse(href)
rescue Addressable::URI::InvalidURIError
nil
end
def links def links
query = 'descendant-or-self::a[@href and not(@href = "")]' query = 'descendant-or-self::a[@href and not(@href = "")]'
doc.xpath(query) doc.xpath(query)
...@@ -45,6 +63,57 @@ module Banzai ...@@ -45,6 +63,57 @@ module Banzai
def internal_url def internal_url
@internal_url ||= URI.parse(Gitlab.config.gitlab.url) @internal_url ||= URI.parse(Gitlab.config.gitlab.url)
end end
# Only replace an autolink with an IDN with it's punycode
# version if we need emailable links. Otherwise let it
# be shown normally and the tooltips will show the
# punycode version.
def punycode_autolink_node!(uri, node)
return unless uri
return unless context[:emailable_links]
unencoded_uri_str = Addressable::URI.unencode(node['href'])
if unencoded_uri_str == node.content && idn?(uri)
node.content = uri.normalize
end
end
# escape any right-to-left (RTLO) characters in link text
def sanitize_link_text!(node)
node.inner_html = node.inner_html.gsub(RTLO, ENCODED_RTLO)
end
# If the domain is an international domain name (IDN),
# let's expose with a tooltip in case it's intended
# to be malicious. This is particularly useful for links
# where the link text is not the same as the actual link.
# We will continue to show the unicode version of the domain
# in autolinked link text, which could contain emojis, etc.
#
# Also show the tooltip if the url contains the RTLO character,
# as this is an indicator of a malicious link
def add_malicious_tooltip!(uri, node)
if idn?(uri) || has_encoded_rtlo?(uri)
node.add_class('has-tooltip')
node.set_attribute('title', uri.normalize)
end
end
def add_nofollow!(uri, node)
if SCHEMES.include?(uri&.scheme)
node.set_attribute('rel', 'nofollow noreferrer noopener')
node.set_attribute('target', '_blank')
end
end
def idn?(uri)
uri&.normalized_host&.start_with?('xn--')
end
def has_encoded_rtlo?(uri)
uri&.to_s&.include?(ENCODED_RTLO)
end
end end
end end
end end
...@@ -12,6 +12,7 @@ module Banzai ...@@ -12,6 +12,7 @@ module Banzai
def self.transform_context(context) def self.transform_context(context)
super(context).merge( super(context).merge(
only_path: false, only_path: false,
emailable_links: true,
no_sourcepos: true no_sourcepos: true
) )
end end
......
...@@ -188,6 +188,22 @@ describe Banzai::Filter::AutolinkFilter do ...@@ -188,6 +188,22 @@ describe Banzai::Filter::AutolinkFilter do
expect(doc.at_css('a')['class']).to eq 'custom' expect(doc.at_css('a')['class']).to eq 'custom'
end end
it 'escapes RTLO and other characters' do
# rendered text looks like "http://example.com/evilexe.mp3"
evil_link = "#{link}evil\u202E3pm.exe"
doc = filter("#{evil_link}")
expect(doc.at_css('a')['href']).to eq "http://about.gitlab.com/evil%E2%80%AE3pm.exe"
end
it 'encodes international domains' do
link = "http://one😄two.com"
expected = "http://one%F0%9F%98%84two.com"
doc = filter(link)
expect(doc.at_css('a')['href']).to eq expected
end
described_class::IGNORE_PARENTS.each do |elem| described_class::IGNORE_PARENTS.each do |elem|
it "ignores valid links contained inside '#{elem}' element" do it "ignores valid links contained inside '#{elem}' element" do
exp = act = "<#{elem}>See #{link}</#{elem}>" exp = act = "<#{elem}>See #{link}</#{elem}>"
......
...@@ -62,6 +62,13 @@ describe Banzai::Filter::ExternalLinkFilter do ...@@ -62,6 +62,13 @@ describe Banzai::Filter::ExternalLinkFilter do
expect(doc.to_html).to eq(expected) expect(doc.to_html).to eq(expected)
end end
it 'adds rel and target to improperly formatted autolinks' do
doc = filter %q(<p><a href="mailto://jblogs@example.com">mailto://jblogs@example.com</a></p>)
expected = %q(<p><a href="mailto://jblogs@example.com" rel="nofollow noreferrer noopener" target="_blank">mailto://jblogs@example.com</a></p>)
expect(doc.to_html).to eq(expected)
end
end end
context 'for links with a username' do context 'for links with a username' do
...@@ -112,4 +119,62 @@ describe Banzai::Filter::ExternalLinkFilter do ...@@ -112,4 +119,62 @@ describe Banzai::Filter::ExternalLinkFilter do
it_behaves_like 'an external link with rel attribute' it_behaves_like 'an external link with rel attribute'
end end
context 'links with RTLO character' do
# In rendered text this looks like "http://example.com/evilexe.mp3"
let(:doc) { filter %Q(<a href="http://example.com/evil%E2%80%AE3pm.exe">http://example.com/evil\u202E3pm.exe</a>) }
it_behaves_like 'an external link with rel attribute'
it 'escapes RTLO in link text' do
expected = %q(http://example.com/evil%E2%80%AE3pm.exe</a>)
expect(doc.to_html).to include(expected)
end
it 'does not mangle the link text' do
doc = filter %Q(<a href="http://example.com">One<span>and</span>\u202Eexe.mp3</a>)
expect(doc.to_html).to include('One<span>and</span>%E2%80%AEexe.mp3</a>')
end
end
context 'for generated autolinks' do
context 'with an IDN character' do
let(:doc) { filter(%q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>)) }
let(:doc_email) { filter(%q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>), emailable_links: true) }
it_behaves_like 'an external link with rel attribute'
it 'does not change the link text' do
expect(doc.to_html).to include('http://exa😄mple.com</a>')
end
it 'uses punycode for emails' do
expect(doc_email.to_html).to include('http://xn--example-6p25f.com/</a>')
end
end
end
context 'for links that look malicious' do
context 'with an IDN character' do
let(:doc) { filter %q(<a href="http://exa%F0%9F%98%84mple.com">http://exa😄mple.com</a>) }
it 'adds a toolip with punycode' do
expect(doc.to_html).to include('http://exa😄mple.com</a>')
expect(doc.to_html).to include('class="has-tooltip"')
expect(doc.to_html).to include('title="http://xn--example-6p25f.com/"')
end
end
context 'with RTLO character' do
let(:doc) { filter %q(<a href="http://example.com/evil%E2%80%AE3pm.exe">Evil Test</a>) }
it 'adds a toolip with punycode' do
expect(doc.to_html).to include('Evil Test</a>')
expect(doc.to_html).to include('class="has-tooltip"')
expect(doc.to_html).to include('title="http://example.com/evil%E2%80%AE3pm.exe"')
end
end
end
end end
...@@ -10,5 +10,19 @@ describe Banzai::Pipeline::EmailPipeline do ...@@ -10,5 +10,19 @@ describe Banzai::Pipeline::EmailPipeline do
expect(described_class.filters).not_to be_empty expect(described_class.filters).not_to be_empty
expect(described_class.filters).not_to include(Banzai::Filter::ImageLazyLoadFilter) expect(described_class.filters).not_to include(Banzai::Filter::ImageLazyLoadFilter)
end end
it 'shows punycode for autolinks' do
examples = %W[
http://one😄two.com
http://\u0261itlab.com
]
examples.each do |markdown|
result = described_class.call(markdown, project: nil)[:output]
link = result.css('a').first
expect(link.content).to include('http://xn--')
end
end
end end
end end
...@@ -59,4 +59,42 @@ describe Banzai::Pipeline::FullPipeline do ...@@ -59,4 +59,42 @@ describe Banzai::Pipeline::FullPipeline do
expect(html.lines.map(&:strip).join("\n")).to eq filtered_footnote expect(html.lines.map(&:strip).join("\n")).to eq filtered_footnote
end end
end end
describe 'links are detected as malicious' do
it 'has tooltips for malicious links' do
examples = %W[
http://example.com/evil\u202E3pm.exe
[evilexe.mp3](http://example.com/evil\u202E3pm.exe)
rdar://localhost.com/\u202E3pm.exe
http://one😄two.com
[Evil-Test](http://one😄two.com)
http://\u0261itlab.com
[Evil-GitLab-link](http://\u0261itlab.com)
![Evil-GitLab-link](http://\u0261itlab.com.png)
]
examples.each do |markdown|
result = described_class.call(markdown, project: nil)[:output]
link = result.css('a').first
expect(link[:class]).to include('has-tooltip')
end
end
it 'has no tooltips for safe links' do
examples = %w[
http://example.com
[Safe-Test](http://example.com)
https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg
[Wikipedia-link](https://commons.wikimedia.org/wiki/File:اسكرام_2_-_تمنراست.jpg)
]
examples.each do |markdown|
result = described_class.call(markdown, project: nil)[:output]
link = result.css('a').first
expect(link[:class]).to be_nil
end
end
end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment