Commit 6a776268 authored by Stefan Behnel's avatar Stefan Behnel

when C compiling original Cython/Py2 sources in Py3, interpret unprefixed...

when C compiling original Cython/Py2 sources in Py3, interpret unprefixed string literals as CPython's parser would
parent 094bda2f
...@@ -332,7 +332,8 @@ class StringConst(object): ...@@ -332,7 +332,8 @@ class StringConst(object):
self.escaped_value = StringEncoding.escape_byte_string(byte_string) self.escaped_value = StringEncoding.escape_byte_string(byte_string)
self.py_strings = None self.py_strings = None
def get_py_string_const(self, encoding, identifier=None, is_str=False): def get_py_string_const(self, encoding, identifier=None,
is_str=False, py3str_cstring=None):
py_strings = self.py_strings py_strings = self.py_strings
text = self.text text = self.text
...@@ -351,47 +352,52 @@ class StringConst(object): ...@@ -351,47 +352,52 @@ class StringConst(object):
else: else:
encoding_key = ''.join(find_alphanums(encoding)) encoding_key = ''.join(find_alphanums(encoding))
key = (is_str, is_unicode, encoding_key) key = (is_str, is_unicode, encoding_key, py3str_cstring)
if py_strings is not None and key in py_strings: if py_strings is not None:
py_string = py_strings[key] try:
return py_strings[key]
except KeyError:
pass
else: else:
if py_strings is None: self.py_strings = {}
self.py_strings = {}
if identifier:
intern = True
elif identifier is None:
if isinstance(text, unicode):
intern = bool(possible_unicode_identifier(text))
else:
intern = bool(possible_bytes_identifier(text))
else:
intern = False
if intern:
prefix = Naming.interned_str_prefix
else:
prefix = Naming.py_const_prefix
pystring_cname = "%s%s_%s" % (
prefix,
(is_str and 's') or (is_unicode and 'u') or 'b',
self.cname[len(Naming.const_prefix):])
py_string = PyStringConst(
pystring_cname, encoding, is_unicode, is_str, intern)
self.py_strings[key] = py_string
if identifier:
intern = True
elif identifier is None:
if isinstance(text, unicode):
intern = bool(possible_unicode_identifier(text))
else:
intern = bool(possible_bytes_identifier(text))
else:
intern = False
if intern:
prefix = Naming.interned_str_prefix
else:
prefix = Naming.py_const_prefix
pystring_cname = "%s%s_%s" % (
prefix,
(is_str and 's') or (is_unicode and 'u') or 'b',
self.cname[len(Naming.const_prefix):])
py_string = PyStringConst(
pystring_cname, encoding, is_unicode, is_str, py3str_cstring, intern)
self.py_strings[key] = py_string
return py_string return py_string
class PyStringConst(object): class PyStringConst(object):
"""Global info about a Python string constant held by GlobalState. """Global info about a Python string constant held by GlobalState.
""" """
# cname string # cname string
# py3str_cstring string
# encoding string # encoding string
# intern boolean # intern boolean
# is_unicode boolean # is_unicode boolean
# is_str boolean # is_str boolean
def __init__(self, cname, encoding, is_unicode, is_str=False, intern=False): def __init__(self, cname, encoding, is_unicode, is_str=False,
py3str_cstring=None, intern=False):
self.cname = cname self.cname = cname
self.py3str_cstring = py3str_cstring
self.encoding = encoding self.encoding = encoding
self.is_str = is_str self.is_str = is_str
self.is_unicode = is_unicode self.is_unicode = is_unicode
...@@ -614,10 +620,16 @@ class GlobalState(object): ...@@ -614,10 +620,16 @@ class GlobalState(object):
c = self.new_string_const(text, byte_string) c = self.new_string_const(text, byte_string)
return c return c
def get_py_string_const(self, text, identifier=None, is_str=False): def get_py_string_const(self, text, identifier=None,
is_str=False, unicode_value=None):
# return a Python string constant, creating a new one if necessary # return a Python string constant, creating a new one if necessary
c_string = self.get_string_const(text) c_string = self.get_string_const(text)
py_string = c_string.get_py_string_const(text.encoding, identifier, is_str) py3str_cstring = None
if is_str and unicode_value is not None \
and unicode_value.utf8encode() != text.byteencode():
py3str_cstring = self.get_string_const(unicode_value)
py_string = c_string.get_py_string_const(
text.encoding, identifier, is_str, py3str_cstring)
return py_string return py_string
def get_interned_identifier(self, text): def get_interned_identifier(self, text):
...@@ -743,6 +755,17 @@ class GlobalState(object): ...@@ -743,6 +755,17 @@ class GlobalState(object):
decls_writer.putln( decls_writer.putln(
"static PyObject *%s;" % py_string.cname) "static PyObject *%s;" % py_string.cname)
if py_string.py3str_cstring:
w.putln("#if PY_MAJOR_VERSION >= 3")
w.putln(
"{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % (
py_string.cname,
py_string.py3str_cstring.cname,
py_string.py3str_cstring.cname,
encoding,
1, 1, 0,
))
w.putln("#else")
w.putln( w.putln(
"{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % ( "{&%s, %s, sizeof(%s), %s, %d, %d, %d}," % (
py_string.cname, py_string.cname,
...@@ -753,6 +776,8 @@ class GlobalState(object): ...@@ -753,6 +776,8 @@ class GlobalState(object):
py_string.is_str, py_string.is_str,
py_string.intern py_string.intern
)) ))
if py_string.py3str_cstring:
w.putln("#endif")
w.putln("{0, 0, 0, 0, 0, 0, 0}") w.putln("{0, 0, 0, 0, 0, 0, 0}")
w.putln("};") w.putln("};")
...@@ -1010,8 +1035,10 @@ class CCodeWriter(object): ...@@ -1010,8 +1035,10 @@ class CCodeWriter(object):
def get_string_const(self, text): def get_string_const(self, text):
return self.globalstate.get_string_const(text).cname return self.globalstate.get_string_const(text).cname
def get_py_string_const(self, text, identifier=None, is_str=False): def get_py_string_const(self, text, identifier=None,
return self.globalstate.get_py_string_const(text, identifier, is_str).cname is_str=False, unicode_value=None):
return self.globalstate.get_py_string_const(
text, identifier, is_str, unicode_value).cname
def get_argument_default_const(self, type): def get_argument_default_const(self, type):
return self.globalstate.get_py_const(type).cname return self.globalstate.get_py_const(type).cname
......
...@@ -1115,16 +1115,6 @@ class StringNode(PyConstNode): ...@@ -1115,16 +1115,6 @@ class StringNode(PyConstNode):
if not dst_type.is_pyobject: if not dst_type.is_pyobject:
return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env) return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env)
self.check_for_coercion_error(dst_type, fail=True) self.check_for_coercion_error(dst_type, fail=True)
# this will be a unicode string in Py3, so make sure we can decode it
if self.value.encoding and isinstance(self.value, StringEncoding.BytesLiteral):
try:
self.value.decode(self.value.encoding)
except UnicodeDecodeError:
error(self.pos, ("Decoding unprefixed string literal from '%s' failed. Consider using"
"a byte string or unicode string explicitly, "
"or adjust the source code encoding.") % self.value.encoding)
return self return self
def can_coerce_to_char_literal(self): def can_coerce_to_char_literal(self):
...@@ -1132,7 +1122,8 @@ class StringNode(PyConstNode): ...@@ -1132,7 +1122,8 @@ class StringNode(PyConstNode):
def generate_evaluation_code(self, code): def generate_evaluation_code(self, code):
self.result_code = code.get_py_string_const( self.result_code = code.get_py_string_const(
self.value, identifier=self.is_identifier, is_str=True) self.value, identifier=self.is_identifier, is_str=True,
unicode_value=self.unicode_value)
def get_constant_c_result_code(self): def get_constant_c_result_code(self):
return None return None
......
...@@ -132,6 +132,17 @@ __doc__ = ur""" ...@@ -132,6 +132,17 @@ __doc__ = ur"""
>>> len(bytes_uescape) >>> len(bytes_uescape)
28 28
>>> (sys.version_info[0] >= 3 and len(str_uescape) == 3 or
... sys.version_info[0] < 3 and len(str_uescape) == 17 or
... len(str_uescape))
True
>>> (sys.version_info[0] >= 3 and str_uescape[0] == 'c' or
... sys.version_info[0] < 3 and str_uescape[0] == '\\' or
... str_uescape[0])
True
>>> print(str_uescape[-1])
B
>>> newlines == "Aaa\n" >>> newlines == "Aaa\n"
True True
...@@ -173,6 +184,7 @@ bresc = br'\12\'\"\\' ...@@ -173,6 +184,7 @@ bresc = br'\12\'\"\\'
uresc = ur'\12\'\"\\' uresc = ur'\12\'\"\\'
bytes_uescape = b'\u1234\U12345678\u\u1\u12\uX' bytes_uescape = b'\u1234\U12345678\u\u1\u12\uX'
str_uescape = '\u0063\U00012345\x42'
newlines = "Aaa\n" newlines = "Aaa\n"
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment