Commit e09468bd authored by Stefan Behnel's avatar Stefan Behnel

Infer bytes/str type for safe bytes/str %-formatting cases that never returns...

Infer bytes/str type for safe bytes/str %-formatting cases that never returns Unicode strings in Py2.
Closes #2153.
parent 54a5fb54
...@@ -22,6 +22,10 @@ Bugs fixed ...@@ -22,6 +22,10 @@ Bugs fixed
fail if the base class constructor was declared without ``nogil``. fail if the base class constructor was declared without ``nogil``.
(Github issue #2157) (Github issue #2157)
* Bytes %-formatting inferred ``basestring`` (bytes or unicode) as result type
in some cases where ``bytes`` would have been safe to infer.
(Github issue #2153)
0.28 (2018-03-13) 0.28 (2018-03-13)
================= =================
......
...@@ -16,6 +16,7 @@ cython.declare(error=object, warning=object, warn_once=object, InternalError=obj ...@@ -16,6 +16,7 @@ cython.declare(error=object, warning=object, warn_once=object, InternalError=obj
bytearray_type=object, slice_type=object, _py_int_types=object, bytearray_type=object, slice_type=object, _py_int_types=object,
IS_PYTHON3=cython.bint) IS_PYTHON3=cython.bint)
import re
import sys import sys
import copy import copy
import os.path import os.path
...@@ -11484,6 +11485,20 @@ class DivNode(NumBinopNode): ...@@ -11484,6 +11485,20 @@ class DivNode(NumBinopNode):
self.operand2.result()) self.operand2.result())
_find_formatting_types = re.compile(
br"%"
br"(?:%|" # %%
br"(?:\([^)]+\))?" # %(name)
br"[-+#,0-9 ]*([a-z])" # %.2f etc.
br")").findall
# These format conversion types can never trigger a Unicode string conversion in Py2.
_safe_bytes_formats = set([
# Excludes 's' and 'r', which can generate non-bytes strings.
b'd', b'i', b'o', b'u', b'x', b'X', b'e', b'E', b'f', b'F', b'g', b'G', b'c', b'b', b'a',
])
class ModNode(DivNode): class ModNode(DivNode):
# '%' operator. # '%' operator.
...@@ -11493,7 +11508,7 @@ class ModNode(DivNode): ...@@ -11493,7 +11508,7 @@ class ModNode(DivNode):
or NumBinopNode.is_py_operation_types(self, type1, type2)) or NumBinopNode.is_py_operation_types(self, type1, type2))
def infer_builtin_types_operation(self, type1, type2): def infer_builtin_types_operation(self, type1, type2):
# b'%s' % xyz raises an exception in Py3, so it's safe to infer the type for Py2 # b'%s' % xyz raises an exception in Py3<3.5, so it's safe to infer the type for Py2 and later Py3's.
if type1 is unicode_type: if type1 is unicode_type:
# None + xyz may be implemented by RHS # None + xyz may be implemented by RHS
if type2.is_builtin_type or not self.operand1.may_be_none(): if type2.is_builtin_type or not self.operand1.may_be_none():
...@@ -11503,6 +11518,11 @@ class ModNode(DivNode): ...@@ -11503,6 +11518,11 @@ class ModNode(DivNode):
return type2 return type2
elif type2.is_numeric: elif type2.is_numeric:
return type1 return type1
elif self.operand1.is_string_literal:
if type1 is str_type or type1 is bytes_type:
if set(_find_formatting_types(self.operand1.value)) <= _safe_bytes_formats:
return type1
return basestring_type
elif type1 is bytes_type and not type2.is_builtin_type: elif type1 is bytes_type and not type2.is_builtin_type:
return None # RHS might implement '% operator differently in Py3 return None # RHS might implement '% operator differently in Py3
else: else:
......
...@@ -215,10 +215,18 @@ def def_to_cdef(source): ...@@ -215,10 +215,18 @@ def def_to_cdef(source):
return '\n'.join(output) return '\n'.join(output)
def exclude_extension_in_pyver(*versions):
def check(ext):
return EXCLUDE_EXT if sys.version_info[:2] in versions else ext
return check
def update_linetrace_extension(ext): def update_linetrace_extension(ext):
ext.define_macros.append(('CYTHON_TRACE', 1)) ext.define_macros.append(('CYTHON_TRACE', 1))
return ext return ext
def update_numpy_extension(ext): def update_numpy_extension(ext):
import numpy import numpy
from numpy.distutils.misc_util import get_info from numpy.distutils.misc_util import get_info
...@@ -339,6 +347,7 @@ EXT_EXTRAS = { ...@@ -339,6 +347,7 @@ EXT_EXTRAS = {
'tag:openmp': update_openmp_extension, 'tag:openmp': update_openmp_extension,
'tag:cpp11': update_cpp11_extension, 'tag:cpp11': update_cpp11_extension,
'tag:trace' : update_linetrace_extension, 'tag:trace' : update_linetrace_extension,
'tag:bytesformat': exclude_extension_in_pyver((3, 3), (3, 4)), # no %-bytes formatting
} }
......
# mode: run
# tag: stringformat, bytesformat
import sys
IS_PY2 = sys.version_info[0] < 3
if IS_PY2:
__doc__ = """
>>> print(format_bytes_with_str(u'abc'))
1 12170405abc6A
"""
def format_bytes():
"""
>>> print(format_bytes())
1 121704056A
"""
cdef bytes result = b'%d%3i%x%02X%02.0f%g%c' % (
1, 12, 23, 4, 5, 6, 65)
assert type(result) is bytes
return result.decode('ascii')
def format_bytes_with_str(s):
"""
>>> print(format_bytes_with_str(b'abc'))
1 12170405abc6A
"""
result = b'%d%3i%x%02X%02.0f%s%g%c' % (
1, 12, 23, 4, 5, s, 6, 65)
return result if IS_PY2 else result.decode('ascii')
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment