Commit 6b7f66d0 authored by Kerri Miller's avatar Kerri Miller Committed by GitLab Release Tools Bot

Extract SanitizeNodeLink and apply to WikiLinkFilter

The SanitizationFilter was running before the WikiFilter. Since
WikiFilter can modify links, we could see links that _should_ be stopped
by SanatizationFilter being rendered on the page. I (kerrizor) had
previously addressed the bug in: https://gitlab.com/gitlab-org/gitlab-ee/commit/7bc971915bbeadb950bb0e1f13510bf3038229a4
However, an additional exploit was discovered after that was merged.
Working through the issue, we couldn't simply shuffle the order of
filters, due to some implicit assumptions about the order of filters, so
instead we've extracted the logic that sanitizes a Nokogiri-generated
Node object, and applied it to the WikiLinkFilter as well.

On moving filters around:
Once we start moving around filters, we get cascading failures; fix one,
another one crops up. Many of the existing filters in the WikiPipeline
chain seem to assume that other filters have already done their work,
and thus operate on a "transform anything that's left" basis;
WikiFilter, for instance, assumes any link it finds in the markdown
should be prepended with the wiki_base_path.. but if it does that, it
also turns `href="@user"` into `href="/path/to/wiki/@user"`, which the
UserReferenceFilter doesn't see as a user reference it needs to
transform into a user profile link. This is true for all the reference
filters in the WikiPipeline.
parent 4b137d6d
---
title: Patch XSS issue in wiki links
merge_request:
author:
type: security
...@@ -18,6 +18,7 @@ module Banzai ...@@ -18,6 +18,7 @@ module Banzai
# #
class AutolinkFilter < HTML::Pipeline::Filter class AutolinkFilter < HTML::Pipeline::Filter
include ActionView::Helpers::TagHelper include ActionView::Helpers::TagHelper
include Gitlab::Utils::SanitizeNodeLink
# Pattern to match text that should be autolinked. # Pattern to match text that should be autolinked.
# #
...@@ -72,19 +73,11 @@ module Banzai ...@@ -72,19 +73,11 @@ module Banzai
private private
# Return true if any of the UNSAFE_PROTOCOLS strings are included in the URI scheme
def contains_unsafe?(scheme)
return false unless scheme
scheme = scheme.strip.downcase
Banzai::Filter::SanitizationFilter::UNSAFE_PROTOCOLS.any? { |protocol| scheme.include?(protocol) }
end
def autolink_match(match) def autolink_match(match)
# start by stripping out dangerous links # start by stripping out dangerous links
begin begin
uri = Addressable::URI.parse(match) uri = Addressable::URI.parse(match)
return match if contains_unsafe?(uri.scheme) return match unless safe_protocol?(uri.scheme)
rescue Addressable::URI::InvalidURIError rescue Addressable::URI::InvalidURIError
return match return match
end end
......
...@@ -11,6 +11,7 @@ module Banzai ...@@ -11,6 +11,7 @@ module Banzai
# Extends HTML::Pipeline::SanitizationFilter with common rules. # Extends HTML::Pipeline::SanitizationFilter with common rules.
class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter class BaseSanitizationFilter < HTML::Pipeline::SanitizationFilter
include Gitlab::Utils::StrongMemoize include Gitlab::Utils::StrongMemoize
extend Gitlab::Utils::SanitizeNodeLink
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
...@@ -40,7 +41,7 @@ module Banzai ...@@ -40,7 +41,7 @@ module Banzai
# Allow any protocol in `a` elements # Allow any protocol in `a` elements
# and then remove links with unsafe protocols # and then remove links with unsafe protocols
whitelist[:protocols].delete('a') whitelist[:protocols].delete('a')
whitelist[:transformers].push(self.class.remove_unsafe_links) whitelist[:transformers].push(self.class.method(:remove_unsafe_links))
# Remove `rel` attribute from `a` elements # Remove `rel` attribute from `a` elements
whitelist[:transformers].push(self.class.remove_rel) whitelist[:transformers].push(self.class.remove_rel)
...@@ -54,35 +55,6 @@ module Banzai ...@@ -54,35 +55,6 @@ module Banzai
end end
class << self class << self
def remove_unsafe_links
lambda do |env|
node = env[:node]
return unless node.name == 'a'
return unless node.has_attribute?('href')
begin
node['href'] = node['href'].strip
uri = Addressable::URI.parse(node['href'])
return unless uri.scheme
# Remove all invalid scheme characters before checking against the
# list of unsafe protocols.
#
# See https://tools.ietf.org/html/rfc3986#section-3.1
scheme = uri.scheme
.strip
.downcase
.gsub(/[^A-Za-z0-9\+\.\-]+/, '')
node.remove_attribute('href') if UNSAFE_PROTOCOLS.include?(scheme)
rescue Addressable::URI::InvalidURIError
node.remove_attribute('href')
end
end
end
def remove_rel def remove_rel
lambda do |env| lambda do |env|
if env[:node_name] == 'a' if env[:node_name] == 'a'
......
...@@ -8,15 +8,19 @@ module Banzai ...@@ -8,15 +8,19 @@ module Banzai
# Context options: # Context options:
# :project_wiki # :project_wiki
class WikiLinkFilter < HTML::Pipeline::Filter class WikiLinkFilter < HTML::Pipeline::Filter
include Gitlab::Utils::SanitizeNodeLink
def call def call
return doc unless project_wiki? return doc unless project_wiki?
doc.search('a:not(.gfm)').each { |el| process_link_attr(el.attribute('href')) } doc.search('a:not(.gfm)').each { |el| process_link(el.attribute('href'), el) }
doc.search('video').each { |el| process_link_attr(el.attribute('src')) }
doc.search('video').each { |el| process_link(el.attribute('src'), el) }
doc.search('img').each do |el| doc.search('img').each do |el|
attr = el.attribute('data-src') || el.attribute('src') attr = el.attribute('data-src') || el.attribute('src')
process_link_attr(attr) process_link(attr, el)
end end
doc doc
...@@ -24,6 +28,11 @@ module Banzai ...@@ -24,6 +28,11 @@ module Banzai
protected protected
def process_link(link_attr, node)
process_link_attr(link_attr)
remove_unsafe_links({ node: node }, remove_invalid_links: false)
end
def project_wiki? def project_wiki?
!context[:project_wiki].nil? !context[:project_wiki].nil?
end end
......
...@@ -4,8 +4,6 @@ module Banzai ...@@ -4,8 +4,6 @@ module Banzai
module Filter module Filter
class WikiLinkFilter < HTML::Pipeline::Filter class WikiLinkFilter < HTML::Pipeline::Filter
class Rewriter class Rewriter
UNSAFE_SLUG_REGEXES = [/\Ajavascript:/i].freeze
def initialize(link_string, wiki:, slug:) def initialize(link_string, wiki:, slug:)
@uri = Addressable::URI.parse(link_string) @uri = Addressable::URI.parse(link_string)
@wiki_base_path = wiki && wiki.wiki_base_path @wiki_base_path = wiki && wiki.wiki_base_path
...@@ -37,8 +35,6 @@ module Banzai ...@@ -37,8 +35,6 @@ module Banzai
# Of the form `./link`, `../link`, or similar # Of the form `./link`, `../link`, or similar
def apply_hierarchical_link_rules! def apply_hierarchical_link_rules!
return if slug_considered_unsafe?
@uri = Addressable::URI.join(@slug, @uri) if @uri.to_s[0] == '.' @uri = Addressable::URI.join(@slug, @uri) if @uri.to_s[0] == '.'
end end
...@@ -58,10 +54,6 @@ module Banzai ...@@ -58,10 +54,6 @@ module Banzai
def repository_upload? def repository_upload?
@uri.relative? && @uri.path.starts_with?(Wikis::CreateAttachmentService::ATTACHMENT_PATH) @uri.relative? && @uri.path.starts_with?(Wikis::CreateAttachmentService::ATTACHMENT_PATH)
end end
def slug_considered_unsafe?
!!UNSAFE_SLUG_REGEXES.detect { |r| r.match?(@slug) }
end
end end
end end
end end
......
# frozen_string_literal: true
require_dependency 'gitlab/utils'
module Gitlab
module Utils
module SanitizeNodeLink
UNSAFE_PROTOCOLS = %w(data javascript vbscript).freeze
ATTRS_TO_SANITIZE = %w(href src data-src).freeze
def remove_unsafe_links(env, remove_invalid_links: true)
node = env[:node]
sanitize_node(node: node, remove_invalid_links: remove_invalid_links)
# HTML entities such as <video></video> have scannable attrs in
# children elements, which also need to be sanitized.
#
node.children.each do |child_node|
sanitize_node(node: child_node, remove_invalid_links: remove_invalid_links)
end
end
# Remove all invalid scheme characters before checking against the
# list of unsafe protocols.
#
# See https://tools.ietf.org/html/rfc3986#section-3.1
#
def safe_protocol?(scheme)
return false unless scheme
scheme = scheme
.strip
.downcase
.gsub(/[^A-Za-z\+\.\-]+/, '')
UNSAFE_PROTOCOLS.none?(scheme)
end
private
def sanitize_node(node:, remove_invalid_links: true)
ATTRS_TO_SANITIZE.each do |attr|
next unless node.has_attribute?(attr)
begin
node[attr] = node[attr].strip
uri = Addressable::URI.parse(node[attr])
next unless uri.scheme
next if safe_protocol?(uri.scheme)
node.remove_attribute(attr)
rescue Addressable::URI::InvalidURIError
node.remove_attribute(attr) if remove_invalid_links
end
end
end
end
end
end
...@@ -70,47 +70,5 @@ describe Banzai::Filter::WikiLinkFilter do ...@@ -70,47 +70,5 @@ describe Banzai::Filter::WikiLinkFilter do
expect(filtered_link.attribute('href').value).to eq(invalid_link) expect(filtered_link.attribute('href').value).to eq(invalid_link)
end end
end end
context "when the slug is deemed unsafe or invalid" do
let(:link) { "alert(1);" }
invalid_slugs = [
"javascript:",
"JaVaScRiPt:",
"\u0001java\u0003script:",
"javascript :",
"javascript: ",
"javascript : ",
":javascript:",
"javascript&#58;",
"javascript&#0058;",
"javascript&#x3A;",
"javascript&#x003A;",
"java\0script:",
" &#14; javascript:"
]
invalid_slugs.each do |slug|
context "with the slug #{slug}" do
it "doesn't rewrite a (.) relative link" do
filtered_link = filter(
"<a href='.#{link}'>Link</a>",
project_wiki: wiki,
page_slug: slug).children[0]
expect(filtered_link.attribute('href').value).not_to include(slug)
end
it "doesn't rewrite a (..) relative link" do
filtered_link = filter(
"<a href='..#{link}'>Link</a>",
project_wiki: wiki,
page_slug: slug).children[0]
expect(filtered_link.attribute('href').value).not_to include(slug)
end
end
end
end
end end
end end
...@@ -177,6 +177,85 @@ describe Banzai::Pipeline::WikiPipeline do ...@@ -177,6 +177,85 @@ describe Banzai::Pipeline::WikiPipeline do
end end
end end
end end
describe "checking slug validity when assembling links" do
context "with a valid slug" do
let(:valid_slug) { "http://example.com" }
it "includes the slug in a (.) relative link" do
output = described_class.to_html(
"[Link](./alert(1);)",
project: project,
project_wiki: project_wiki,
page_slug: valid_slug
)
expect(output).to include(valid_slug)
end
it "includeds the slug in a (..) relative link" do
output = described_class.to_html(
"[Link](../alert(1);)",
project: project,
project_wiki: project_wiki,
page_slug: valid_slug
)
expect(output).to include(valid_slug)
end
end
context "when the slug is deemed unsafe or invalid" do
invalid_slugs = [
"javascript:",
"JaVaScRiPt:",
"\u0001java\u0003script:",
"javascript :",
"javascript: ",
"javascript : ",
":javascript:",
"javascript&#58;",
"javascript&#0058;",
"javascript&#x3A;",
"javascript&#x003A;",
"java\0script:",
" &#14; javascript:"
]
invalid_js_links = [
"alert(1);",
"alert(document.location);"
]
invalid_slugs.each do |slug|
context "with the invalid slug #{slug}" do
invalid_js_links.each do |link|
it "doesn't include a prohibited slug in a (.) relative link '#{link}'" do
output = described_class.to_html(
"[Link](./#{link})",
project: project,
project_wiki: project_wiki,
page_slug: slug
)
expect(output).not_to include(slug)
end
it "doesn't include a prohibited slug in a (..) relative link '#{link}'" do
output = described_class.to_html(
"[Link](../#{link})",
project: project,
project_wiki: project_wiki,
page_slug: slug
)
expect(output).not_to include(slug)
end
end
end
end
end
end
end end
describe 'videos' do describe 'videos' do
......
require 'spec_helper'
describe Gitlab::Utils::SanitizeNodeLink do
let(:klass) do
struct = Struct.new(:value)
struct.include(described_class)
struct
end
subject(:object) { klass.new(:value) }
invalid_schemes = [
"javascript:",
"JaVaScRiPt:",
"\u0001java\u0003script:",
"javascript :",
"javascript: ",
"javascript : ",
":javascript:",
"javascript&#58;",
"javascript&#0058;",
" &#14; javascript:"
]
invalid_schemes.each do |scheme|
context "with the scheme: #{scheme}" do
describe "#remove_unsafe_links" do
tags = {
a: {
doc: HTML::Pipeline.parse("<a href='#{scheme}alert(1);'>foo</a>"),
attr: "href",
node_to_check: -> (doc) { doc.children.first }
},
img: {
doc: HTML::Pipeline.parse("<img src='#{scheme}alert(1);'>"),
attr: "src",
node_to_check: -> (doc) { doc.children.first }
},
video: {
doc: HTML::Pipeline.parse("<video><source src='#{scheme}alert(1);'></video>"),
attr: "src",
node_to_check: -> (doc) { doc.children.first.children.filter("source").first }
}
}
tags.each do |tag, opts|
context "<#{tag}> tags" do
it "removes the unsafe link" do
node = opts[:node_to_check].call(opts[:doc])
expect { object.remove_unsafe_links({ node: node }, remove_invalid_links: true) }
.to change { node[opts[:attr]] }
expect(node[opts[:attr]]).to be_blank
end
end
end
end
describe "#safe_protocol?" do
let(:doc) { HTML::Pipeline.parse("<a href='#{scheme}alert(1);'>foo</a>") }
let(:node) { doc.children.first }
let(:uri) { Addressable::URI.parse(node['href'])}
it "returns false" do
expect(object.safe_protocol?(scheme)).to be_falsy
end
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment