Commit 329e067f authored by Douwe Maan's avatar Douwe Maan

Merge branch 'rs-dev-issue-2613' into 'master'

Add custom protocol whitelisting to SanitizationFilter

Addresses internal https://dev.gitlab.org/gitlab/gitlabhq/issues/2613

We allow any protocol for autolinks: irc://irc.freenode.net/git

But manual Markdown links with the same protocol get sanitized: `[This will not be clickable](irc://irc.freenode.net/git)`: [This will not be clickable](irc://irc.freenode.net/git)

To get around this we have to first allow *all* protocols, and then manually clean dangerous (i.e., `javascript:`) protocols.

See merge request !1496
parents cbd1c3e3 16f8ca56
...@@ -48,6 +48,12 @@ module Gitlab ...@@ -48,6 +48,12 @@ module Gitlab
# Allow span elements # Allow span elements
whitelist[:elements].push('span') whitelist[:elements].push('span')
# Allow any protocol in `a` elements...
whitelist[:protocols].delete('a')
# ...but then remove links with the `javascript` protocol
whitelist[:transformers].push(remove_javascript_links)
# Remove `rel` attribute from `a` elements # Remove `rel` attribute from `a` elements
whitelist[:transformers].push(remove_rel) whitelist[:transformers].push(remove_rel)
...@@ -57,6 +63,19 @@ module Gitlab ...@@ -57,6 +63,19 @@ module Gitlab
whitelist whitelist
end end
def remove_javascript_links
lambda do |env|
node = env[:node]
return unless node.name == 'a'
return unless node.has_attribute?('href')
if node['href'].start_with?('javascript', ':javascript')
node.remove_attribute('href')
end
end
end
def remove_rel def remove_rel
lambda do |env| lambda do |env|
if env[:node_name] == 'a' if env[:node_name] == 'a'
......
...@@ -44,7 +44,7 @@ module Gitlab::Markdown ...@@ -44,7 +44,7 @@ module Gitlab::Markdown
instance = described_class.new('Foo') instance = described_class.new('Foo')
3.times { instance.whitelist } 3.times { instance.whitelist }
expect(instance.whitelist[:transformers].size).to eq 4 expect(instance.whitelist[:transformers].size).to eq 5
end end
it 'allows syntax highlighting' do it 'allows syntax highlighting' do
...@@ -77,19 +77,100 @@ module Gitlab::Markdown ...@@ -77,19 +77,100 @@ module Gitlab::Markdown
end end
it 'removes `rel` attribute from `a` elements' do it 'removes `rel` attribute from `a` elements' do
doc = filter(%q{<a href="#" rel="nofollow">Link</a>}) act = %q{<a href="#" rel="nofollow">Link</a>}
exp = %q{<a href="#">Link</a>}
expect(doc.css('a').size).to eq 1 expect(filter(act).to_html).to eq exp
expect(doc.at_css('a')['href']).to eq '#'
expect(doc.at_css('a')['rel']).to be_nil
end end
it 'removes script-like `href` attribute from `a` elements' do # Adapted from the Sanitize test suite: http://git.io/vczrM
html = %q{<a href="javascript:alert('Hi')">Hi</a>} protocols = {
doc = filter(html) 'protocol-based JS injection: simple, no spaces' => {
input: '<a href="javascript:alert(\'XSS\');">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: simple, spaces before' => {
input: '<a href="javascript :alert(\'XSS\');">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: simple, spaces after' => {
input: '<a href="javascript: alert(\'XSS\');">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: simple, spaces before and after' => {
input: '<a href="javascript : alert(\'XSS\');">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: preceding colon' => {
input: '<a href=":javascript:alert(\'XSS\');">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: UTF-8 encoding' => {
input: '<a href="javascript&#58;">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: long UTF-8 encoding' => {
input: '<a href="javascript&#0058;">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: long UTF-8 encoding without semicolons' => {
input: '<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: hex encoding' => {
input: '<a href="javascript&#x3A;">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: long hex encoding' => {
input: '<a href="javascript&#x003A;">foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: hex encoding without semicolons' => {
input: '<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>foo</a>',
output: '<a>foo</a>'
},
'protocol-based JS injection: null char' => {
input: "<a href=java\0script:alert(\"XSS\")>foo</a>",
output: '<a href="java"></a>'
},
'protocol-based JS injection: spaces and entities' => {
input: '<a href=" &#14; javascript:alert(\'XSS\');">foo</a>',
output: '<a href="">foo</a>'
},
}
protocols.each do |name, data|
it "handles #{name}" do
doc = filter(data[:input])
expect(doc.to_html).to eq data[:output]
end
end
it 'allows non-standard anchor schemes' do
exp = %q{<a href="irc://irc.freenode.net/git">IRC</a>}
act = filter(exp)
expect(act.to_html).to eq exp
end
it 'allows relative links' do
exp = %q{<a href="foo/bar.md">foo/bar.md</a>}
act = filter(exp)
expect(doc.css('a').size).to eq 1 expect(act.to_html).to eq exp
expect(doc.at_css('a')['href']).to be_nil
end end
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment