ooo: refuse documents containing links other than embedding objects or local...

ooo: refuse documents containing links other than embedding objects or local sibling links, that is used in ERP5.

ooo: refuse documents containing links other than embedding objects or local...
ooo: refuse documents containing links other than embedding objects or local sibling links, that is used in ERP5.
6997d446 · Kazuhiko Shiozaki · 6b149d10 · 6997d446 · 6997d446 · 6997d446
Commit 6997d446 authored Feb 13, 2024 by Kazuhiko Shiozaki
7 changed files
--- a/cloudooo/handler/ooo/helper/unoconverter.py
+++ b/cloudooo/handler/ooo/helper/unoconverter.py
@@ -39,6 +39,12 @@ from tempfile import mktemp
 from base64 import b64encode, b64decode
 from functools import partial
 from getopt import getopt, GetoptError
+from html.parser import HTMLParser
+import os.path
+from urllib.parse import urlparse
+import xml.etree.ElementTree as ET
+import tempfile
+from zipfile import BadZipFile, ZipFile
 __doc__ = """
@@ -195,6 +201,36 @@ class UnoDocument:
        self._getPropertyToImport(infilter))
    if not uno_document:
      raise AttributeError("This document can not be loaded or is empty")
+    def isSafeUrl(url):
+      parsed_url = urlparse(url)
+      if parsed_url.scheme == 'data':
+        return True
+      elif parsed_url.scheme == '':
+        norm_path = os.path.normpath(parsed_url.path)
+        if norm_path[0] not in ('/', '.') or \
+            os.path.dirname(os.path.normpath(parsed_url.path)) == os.path.dirname(self.document_url):
+          return True
+      return False
+    with tempfile.NamedTemporaryFile() as temp_file:
+      uno_document.storeToURL(systemPathToFileUrl(temp_file.name), ())
+      try:
+        with ZipFile(temp_file.name, 'r') as zip_file:
+          content = ET.fromstring(zip_file.read('content.xml'))
+          for e in content.findall('.//*[@{http://www.w3.org/1999/xlink}actuate="onLoad"]'):
+            href = e.attrib.get('{http://www.w3.org/1999/xlink}href')
+            if href:
+              if not isSafeUrl(href):
+                raise RuntimeError('This document contains unsafe links %s' % href)
+      except BadZipFile: # HTML input case
+        class CustomHTMLParser(HTMLParser):
+          def handle_starttag(self, tag, attrs):
+            for attr in attrs:
+              if attr[0] == 'src':
+                if not isSafeUrl(attr[1]):
+                  raise RuntimeError('This document contains unsafe links %s' % attr[1])
+        parser = CustomHTMLParser()
+        with open(temp_file.name, 'r') as f:
+          parser.feed(f.read())
    if refresh:
      # Before converting to expected format, refresh dynamic
      # value inside document.

--- a/cloudooo/handler/ooo/tests/data/with_link.html
+++ b/cloudooo/handler/ooo/tests/data/with_link.html
--- a/cloudooo/handler/ooo/tests/data/with_link.odg
+++ b/cloudooo/handler/ooo/tests/data/with_link.odg
--- a/cloudooo/handler/ooo/tests/data/with_link.odp
+++ b/cloudooo/handler/ooo/tests/data/with_link.odp
--- a/cloudooo/handler/ooo/tests/data/with_link.ods
+++ b/cloudooo/handler/ooo/tests/data/with_link.ods
--- a/cloudooo/handler/ooo/tests/data/with_link.odt
+++ b/cloudooo/handler/ooo/tests/data/with_link.odt
--- a/cloudooo/handler/ooo/tests/testOooServer.py
+++ b/cloudooo/handler/ooo/tests/testOooServer.py
@@ -37,6 +37,7 @@ from zipfile import ZipFile, is_zipfile
 from cloudooo.tests.cloudoooTestCase import TestCase
 from unittest import expectedFailure
 import magic
+import xmlrpc.client
 from cloudooo.handler.ooo.tests.testOooMimemapper import text_expected_tuple, presentation_expected_tuple
@@ -686,3 +687,15 @@ class TestCSVEncoding(TestCase):
    self.assertEqual(
        [],
        [x.text for x in tree.getroot().findall('.//td')])
+class TestInvalidFile(TestCase):
+  """cloudoo should refuse potentially unsafe files."""
+  def test_with_link(self):
+    for ext in ('odt', 'ods', 'odp', 'odg', 'html'):
+      with open('./data/with_link.%s' % ext, 'rb') as f:
+        data = encodebytes(f.read()).decode()
+      self.assertRaisesRegex(
+        xmlrpc.client.Fault,
+        'This document contains unsafe links .*',
+        self.proxy.convertFile, data, ext, 'pdf'
+      )