Commit fe148408 authored by Fred Drake's avatar Fred Drake

Backport the type-sniffing code from Zope 3 to support XML page templates

from the filesystem.
parent 9a595cb2
......@@ -15,7 +15,7 @@
Zope object encapsulating a Page Template from the filesystem.
"""
__version__='$Revision: 1.23 $'[11:-2]
__version__='$Revision: 1.24 $'[11:-2]
import os, AccessControl, Acquisition, sys
from Globals import package_home, DevelopmentMode
......@@ -117,7 +117,12 @@ class PageTemplateFile(Script, PageTemplate, Traversable):
mtime = 0
if self._v_program is not None and mtime == self._v_last_read:
return
self.pt_edit(open(self.filename), None)
f = open(self.filename, "rb")
try:
text = f.read()
finally:
f.close()
self.pt_edit(text, sniff_type(text))
self._cook()
if self._v_errors:
LOG('PageTemplateFile', ERROR, 'Error in template',
......@@ -154,3 +159,19 @@ class PageTemplateFile(Script, PageTemplate, Traversable):
from ZODB.POSException import StorageError
raise StorageError, ("Instance of AntiPersistent class %s "
"cannot be stored." % self.__class__.__name__)
XML_PREFIXES = [
"<?xml", # ascii, utf-8
"\xef\xbb\xbf<?xml", # utf-8 w/ byte order mark
"\0<\0?\0x\0m\0l", # utf-16 big endian
"<\0?\0x\0m\0l\0", # utf-16 little endian
"\xfe\xff\0<\0?\0x\0m\0l", # utf-16 big endian w/ byte order mark
"\xff\xfe<\0?\0x\0m\0l\0", # utf-16 little endian w/ byte order mark
]
def sniff_type(text):
for prefix in XML_PREFIXES:
if text.startswith(prefix):
return "text/xml"
return None
"""Tests of PageTemplateFile."""
import os
import tempfile
import unittest
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
class TypeSniffingTestCase(unittest.TestCase):
TEMPFILENAME = tempfile.mktemp()
def tearDown(self):
if os.path.exists(self.TEMPFILENAME):
os.unlink(self.TEMPFILENAME)
def check_content_type(self, text, expected_type):
f = open(self.TEMPFILENAME, "wb")
f.write(text)
f.close()
pt = PageTemplateFile(self.TEMPFILENAME)
pt.read()
self.assertEqual(pt.content_type, expected_type)
def test_sniffer_xml_ascii(self):
self.check_content_type(
"<?xml version='1.0' encoding='ascii'?><doc/>",
"text/xml")
self.check_content_type(
"<?xml\tversion='1.0' encoding='ascii'?><doc/>",
"text/xml")
def test_sniffer_xml_utf8(self):
# w/out byte order mark
self.check_content_type(
"<?xml version='1.0' encoding='utf-8'?><doc/>",
"text/xml")
self.check_content_type(
"<?xml\tversion='1.0' encoding='utf-8'?><doc/>",
"text/xml")
# with byte order mark
self.check_content_type(
"\xef\xbb\xbf<?xml version='1.0' encoding='utf-8'?><doc/>",
"text/xml")
self.check_content_type(
"\xef\xbb\xbf<?xml\tversion='1.0' encoding='utf-8'?><doc/>",
"text/xml")
def test_sniffer_xml_utf16_be(self):
# w/out byte order mark
self.check_content_type(
"\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
"\0<\0d\0o\0c\0/\0>",
"text/xml")
self.check_content_type(
"\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
"\0<\0d\0o\0c\0/\0>",
"text/xml")
# with byte order mark
self.check_content_type(
"\xfe\xff"
"\0<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
"\0<\0d\0o\0c\0/\0>",
"text/xml")
self.check_content_type(
"\xfe\xff"
"\0<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'"
"\0 \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>"
"\0<\0d\0o\0c\0/\0>",
"text/xml")
def test_sniffer_xml_utf16_le(self):
# w/out byte order mark
self.check_content_type(
"<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
"<\0d\0o\0c\0/\0>\n",
"text/xml")
self.check_content_type(
"<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
"<\0d\0o\0c\0/\0>\0",
"text/xml")
# with byte order mark
self.check_content_type(
"\xff\xfe"
"<\0?\0x\0m\0l\0 \0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
"<\0d\0o\0c\0/\0>\0",
"text/xml")
self.check_content_type(
"\xff\xfe"
"<\0?\0x\0m\0l\0\t\0v\0e\0r\0s\0i\0o\0n\0=\0'\01\0.\0000\0'\0"
" \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\08\0'\0?\0>\0"
"<\0d\0o\0c\0/\0>\0",
"text/xml")
HTML_PUBLIC_ID = "-//W3C//DTD HTML 4.01 Transitional//EN"
HTML_SYSTEM_ID = "http://www.w3.org/TR/html4/loose.dtd"
def test_sniffer_html_ascii(self):
self.check_content_type(
"<!DOCTYPE html [ SYSTEM '%s' ]><html></html>"
% self.HTML_SYSTEM_ID,
"text/html")
self.check_content_type(
"<html><head><title>sample document</title></head></html>",
"text/html")
# XXX This reflects a case that simply isn't handled by the
# sniffer; there are many, but it gets it right more often than
# before.
def donttest_sniffer_xml_simple(self):
self.check_content_type("<doc><element/></doc>",
"text/xml")
def test_suite():
return unittest.makeSuite(TypeSniffingTestCase)
if __name__ == "__main__":
unittest.main(defaultTest="test_suite")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment