Commit 49908326 authored by Jérome Perrin's avatar Jérome Perrin

hal_json_style: Tolerate documents with non-utf8 properties

HAL JSON uses JSON, which uses UTF-8, but properties of ERP5 documents
does not enforce any specific encoding, so if we happen to have
documents with string properties that are not UTF-8 compatible, they
could not be queried with HAL JSON (json.dumps was raising UnicodeError)

This pre-process field default properties and document title to make
sure they can be serialized as json. This assumes that configuration
(form definition, actions, ...) will be valid UTF-8, but just does extra
effort to make sure "broken" data can be displayed without error.

/reviewed-on nexedi/erp5!928
parent 1a70090d
No related merge requests found
...@@ -139,18 +139,29 @@ def byteify(string): ...@@ -139,18 +139,29 @@ def byteify(string):
return string return string
def ensureUTF8(obj):
"""Make sure string is UTF-8, by replacing characters that
cannot be decoded.
"""
if isinstance(obj, str):
return obj.decode('utf-8', 'replace').encode('utf-8')
elif isinstance(obj, unicode):
return obj.encode('utf-8', 'replace')
return obj
def ensureSerializable(obj): def ensureSerializable(obj):
"""Ensure obj and all sub-objects are JSON serializable.""" """Ensure obj and all sub-objects are JSON serializable."""
if isinstance(obj, dict): if isinstance(obj, dict):
for key in obj: for key in obj:
obj[key] = ensureSerializable(obj[key]) obj[key] = ensureSerializable(obj[key])
# throw away date's type information and later reconstruct as Zope's DateTime # throw away date's type information and later reconstruct as Zope's DateTime
if isinstance(obj, DateTime): elif isinstance(obj, DateTime):
return obj.ISO() + ' ' + obj.timezone() # ISO with timezone return obj.ISO() + ' ' + obj.timezone() # ISO with timezone
if isinstance(obj, (datetime.datetime, datetime.date, datetime.time)): elif isinstance(obj, (datetime.datetime, datetime.date, datetime.time)):
return obj.isoformat() return obj.isoformat()
# let us believe that iterables don't contain other unserializable objects # let us believe that iterables don't contain other unserializable objects
return obj
return ensureUTF8(obj)
datetime_iso_re = re.compile(r'^\d{4}-\d{2}-\d{2} |T\d{2}:\d{2}:\d{2}.*$') datetime_iso_re = re.compile(r'^\d{4}-\d{2}-\d{2} |T\d{2}:\d{2}:\d{2}.*$')
...@@ -428,7 +439,7 @@ def renderField(traversed_document, field, form, value=MARKER, meta_type=None, k ...@@ -428,7 +439,7 @@ def renderField(traversed_document, field, form, value=MARKER, meta_type=None, k
if "Field" in meta_type: if "Field" in meta_type:
# fields have default value and can be required (unlike boxes) # fields have default value and can be required (unlike boxes)
result["required"] = field.get_value("required") if field.has_value("required") else None result["required"] = field.get_value("required") if field.has_value("required") else None
result["default"] = getFieldDefault(form, field, key, value=value) result["default"] = ensureUTF8(getFieldDefault(form, field, key, value=value))
# start the actual "switch" on field's meta_type here # start the actual "switch" on field's meta_type here
if meta_type in ("ListField", "RadioField", "ParallelListField", "MultiListField"): if meta_type in ("ListField", "RadioField", "ParallelListField", "MultiListField"):
...@@ -560,6 +571,7 @@ def renderField(traversed_document, field, form, value=MARKER, meta_type=None, k ...@@ -560,6 +571,7 @@ def renderField(traversed_document, field, form, value=MARKER, meta_type=None, k
if not isinstance(result["default"], list): if not isinstance(result["default"], list):
result["default"] = [result["default"], ] result["default"] = [result["default"], ]
result["default"] = [ensureUTF8(x) for x in result["default"]]
result.update({ result.update({
"relation_field_id": traversed_document.Field_getSubFieldKeyDict(field, "relation", key=result["key"]), "relation_field_id": traversed_document.Field_getSubFieldKeyDict(field, "relation", key=result["key"]),
...@@ -892,7 +904,7 @@ def renderForm(traversed_document, form, response_dict, key_prefix=None, selecti ...@@ -892,7 +904,7 @@ def renderForm(traversed_document, form, response_dict, key_prefix=None, selecti
"script_id": script.id "script_id": script.id
}, },
"name": getRealRelativeUrl(traversed_document), "name": getRealRelativeUrl(traversed_document),
"title": traversed_document.getTitle() "title": ensureUTF8(traversed_document.getTitle())
} }
form_relative_url = getFormRelativeUrl(form) form_relative_url = getFormRelativeUrl(form)
...@@ -1265,7 +1277,7 @@ def calculateHateoas(is_portal=None, is_site_root=None, traversed_document=None, ...@@ -1265,7 +1277,7 @@ def calculateHateoas(is_portal=None, is_site_root=None, traversed_document=None,
action_dict = {} # actions available on current `traversed_document` action_dict = {} # actions available on current `traversed_document`
last_form_id = None # will point to the previous form so we can obtain previous selection last_form_id = None # will point to the previous form so we can obtain previous selection
result_dict['title'] = traversed_document.getTitle() result_dict['title'] = ensureUTF8(traversed_document.getTitle())
# extra_param_json should be base64 encoded JSON at this point # extra_param_json should be base64 encoded JSON at this point
# only for mode == 'form' it is already a dictionary # only for mode == 'form' it is already a dictionary
......
...@@ -1270,6 +1270,25 @@ class TestERP5Document_getHateoas_mode_traverse(ERP5HALJSONStyleSkinsMixin): ...@@ -1270,6 +1270,25 @@ class TestERP5Document_getHateoas_mode_traverse(ERP5HALJSONStyleSkinsMixin):
self.assertTrue('_actions' not in result_dict['_embedded']['_view']) self.assertTrue('_actions' not in result_dict['_embedded']['_view'])
@simulate('Base_getRequestUrl', '*args, **kwargs',
'return "http://example.org/bar"')
@simulate('Base_getRequestHeader', '*args, **kwargs',
'return "application/hal+json"')
@changeSkin('Hal')
def test_getHateoasDocument_property_corrupted_encoding(self):
document = self._makeDocument()
# this sequence of bytes does not encode to UTF-8
document.setTitle('\xe9\xcf\xf3\xaf')
fake_request = do_fake_request("GET")
result = self.portal.web_site_module.hateoas.ERP5Document_getHateoas(REQUEST=fake_request, mode="traverse", relative_url=document.getRelativeUrl(), view="view")
self.assertEquals(fake_request.RESPONSE.status, 200)
self.assertEquals(fake_request.RESPONSE.getHeader('Content-Type'),
"application/hal+json"
)
result_dict = json.loads(result)
self.assertEqual(result_dict['_embedded']['_view']['my_title']['default'], u'\ufffd\ufffd\ufffd')
self.assertEqual(result_dict['title'], u'\ufffd\ufffd\ufffd')
self.assertEqual(result_dict['_embedded']['_view']['_links']['traversed_document']['title'], u'\ufffd\ufffd\ufffd')
class TestERP5Document_getHateoas_mode_search(ERP5HALJSONStyleSkinsMixin): class TestERP5Document_getHateoas_mode_search(ERP5HALJSONStyleSkinsMixin):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment