Commit 6997d446 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki

ooo: refuse documents containing links other than embedding objects or local...

ooo: refuse documents containing links other than embedding objects or local sibling links, that is used in ERP5.
parent 6b149d10
Pipeline #34098 failed with stage
in 0 seconds
...@@ -39,6 +39,12 @@ from tempfile import mktemp ...@@ -39,6 +39,12 @@ from tempfile import mktemp
from base64 import b64encode, b64decode from base64 import b64encode, b64decode
from functools import partial from functools import partial
from getopt import getopt, GetoptError from getopt import getopt, GetoptError
from html.parser import HTMLParser
import os.path
from urllib.parse import urlparse
import xml.etree.ElementTree as ET
import tempfile
from zipfile import BadZipFile, ZipFile
__doc__ = """ __doc__ = """
...@@ -195,6 +201,36 @@ class UnoDocument: ...@@ -195,6 +201,36 @@ class UnoDocument:
self._getPropertyToImport(infilter)) self._getPropertyToImport(infilter))
if not uno_document: if not uno_document:
raise AttributeError("This document can not be loaded or is empty") raise AttributeError("This document can not be loaded or is empty")
def isSafeUrl(url):
parsed_url = urlparse(url)
if parsed_url.scheme == 'data':
return True
elif parsed_url.scheme == '':
norm_path = os.path.normpath(parsed_url.path)
if norm_path[0] not in ('/', '.') or \
os.path.dirname(os.path.normpath(parsed_url.path)) == os.path.dirname(self.document_url):
return True
return False
with tempfile.NamedTemporaryFile() as temp_file:
uno_document.storeToURL(systemPathToFileUrl(temp_file.name), ())
try:
with ZipFile(temp_file.name, 'r') as zip_file:
content = ET.fromstring(zip_file.read('content.xml'))
for e in content.findall('.//*[@{http://www.w3.org/1999/xlink}actuate="onLoad"]'):
href = e.attrib.get('{http://www.w3.org/1999/xlink}href')
if href:
if not isSafeUrl(href):
raise RuntimeError('This document contains unsafe links %s' % href)
except BadZipFile: # HTML input case
class CustomHTMLParser(HTMLParser):
def handle_starttag(self, tag, attrs):
for attr in attrs:
if attr[0] == 'src':
if not isSafeUrl(attr[1]):
raise RuntimeError('This document contains unsafe links %s' % attr[1])
parser = CustomHTMLParser()
with open(temp_file.name, 'r') as f:
parser.feed(f.read())
if refresh: if refresh:
# Before converting to expected format, refresh dynamic # Before converting to expected format, refresh dynamic
# value inside document. # value inside document.
......
This diff is collapsed.
...@@ -37,6 +37,7 @@ from zipfile import ZipFile, is_zipfile ...@@ -37,6 +37,7 @@ from zipfile import ZipFile, is_zipfile
from cloudooo.tests.cloudoooTestCase import TestCase from cloudooo.tests.cloudoooTestCase import TestCase
from unittest import expectedFailure from unittest import expectedFailure
import magic import magic
import xmlrpc.client
from cloudooo.handler.ooo.tests.testOooMimemapper import text_expected_tuple, presentation_expected_tuple from cloudooo.handler.ooo.tests.testOooMimemapper import text_expected_tuple, presentation_expected_tuple
...@@ -686,3 +687,15 @@ class TestCSVEncoding(TestCase): ...@@ -686,3 +687,15 @@ class TestCSVEncoding(TestCase):
self.assertEqual( self.assertEqual(
[], [],
[x.text for x in tree.getroot().findall('.//td')]) [x.text for x in tree.getroot().findall('.//td')])
class TestInvalidFile(TestCase):
"""cloudoo should refuse potentially unsafe files."""
def test_with_link(self):
for ext in ('odt', 'ods', 'odp', 'odg', 'html'):
with open('./data/with_link.%s' % ext, 'rb') as f:
data = encodebytes(f.read()).decode()
self.assertRaisesRegex(
xmlrpc.client.Fault,
'This document contains unsafe links .*',
self.proxy.convertFile, data, ext, 'pdf'
)
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment