Commit 5c9b32c3 authored by Stefan Behnel's avatar Stefan Behnel

Repair incorrect C-API signature usage for PyUnicode_DecodeUTF16() when...

Repair incorrect C-API signature usage for PyUnicode_DecodeUTF16() when optimising bytes decoding from UTF-16(LE/BE)

Closes #1696
parent 73f04523
......@@ -3337,7 +3337,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
])
_special_encodings = ['UTF8', 'UTF16', 'Latin1', 'ASCII',
_special_encodings = ['UTF8', 'UTF16', 'UTF-16LE', 'UTF-16BE', 'Latin1', 'ASCII',
'unicode_escape', 'raw_unicode_escape']
_special_codecs = [ (name, codecs.getencoder(name))
......@@ -3379,7 +3379,7 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
if encoding and error_handling == 'strict':
# try to find a specific encoder function
codec_name = self._find_special_codec_name(encoding)
if codec_name is not None:
if codec_name is not None and '-' not in codec_name:
encode_function = "PyUnicode_As%sString" % codec_name
return self._substitute_method_call(
node, function, encode_function,
......@@ -3473,9 +3473,12 @@ class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
if encoding is not None:
codec_name = self._find_special_codec_name(encoding)
if codec_name is not None:
if codec_name in ('UTF16', 'UTF-16LE', 'UTF-16BE'):
codec_cname = "__Pyx_PyUnicode_Decode%s" % codec_name.replace('-', '')
else:
codec_cname = "PyUnicode_Decode%s" % codec_name
decode_function = ExprNodes.RawCNameExprNode(
node.pos, type=self.PyUnicode_DecodeXyz_func_ptr_type,
cname="PyUnicode_Decode%s" % codec_name)
node.pos, type=self.PyUnicode_DecodeXyz_func_ptr_type, cname=codec_cname)
encoding_node = ExprNodes.NullNode(node.pos)
else:
decode_function = ExprNodes.NullNode(node.pos)
......
......@@ -396,6 +396,21 @@ static CYTHON_INLINE Py_UCS4 __Pyx_GetItemInt_Unicode_Fast(PyObject* ustring, Py
}
/////////////// decode_c_string_utf16.proto ///////////////
static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16(const char *s, Py_ssize_t size, const char *errors) {
int byteorder = 0;
return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
}
static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16LE(const char *s, Py_ssize_t size, const char *errors) {
int byteorder = -1;
return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
}
static CYTHON_INLINE PyObject *__Pyx_PyUnicode_DecodeUTF16BE(const char *s, Py_ssize_t size, const char *errors) {
int byteorder = 1;
return PyUnicode_DecodeUTF16(s, size, errors, &byteorder);
}
/////////////// decode_cpp_string.proto ///////////////
//@requires: IncludeCppStringH
//@requires: decode_c_bytes
......@@ -417,6 +432,7 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
/////////////// decode_c_string ///////////////
//@requires: IncludeStringH
//@requires: decode_c_string_utf16
/* duplicate code to avoid calling strlen() if start >= 0 and stop >= 0 */
static CYTHON_INLINE PyObject* __Pyx_decode_c_string(
......@@ -459,6 +475,7 @@ static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes(
PyObject* (*decode_func)(const char *s, Py_ssize_t size, const char *errors));
/////////////// decode_c_bytes ///////////////
//@requires: decode_c_string_utf16
static CYTHON_INLINE PyObject* __Pyx_decode_c_bytes(
const char* cstring, Py_ssize_t length, Py_ssize_t start, Py_ssize_t stop,
......
......@@ -152,6 +152,47 @@ def bytes_decode(bytes s, start=None, stop=None):
return s[start:stop].decode('utf8')
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytes_decode_utf16(bytes s):
"""
>>> s = 'abc'.encode('UTF-16')
>>> print(bytes_decode_utf16(s))
abc
"""
return s.decode('UTF-16')
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytes_decode_utf16_le(bytes s):
"""
>>> s = 'abc'.encode('UTF-16LE')
>>> assert s != 'abc'.encode('UTF-16BE')
>>> print(bytes_decode_utf16_le(s))
abc
"""
return s.decode('UTF-16LE')
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
"//SimpleCallNode")
def bytes_decode_utf16_be(bytes s):
"""
>>> s = 'abc'.encode('UTF-16BE')
>>> assert s != 'abc'.encode('UTF-16LE')
>>> print(bytes_decode_utf16_be(s))
abc
"""
return s.decode('UTF-16BE')
@cython.test_assert_path_exists(
"//PythonCapiCallNode")
@cython.test_fail_if_path_exists(
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment