Resolve unicode escapes in Python 2 raw unicode strings.

Closes #1594.

Resolve unicode escapes in Python 2 raw unicode strings.
Closes #1594.
71ec1a4a · Stefan Behnel · 9746acb3 · 71ec1a4a · 71ec1a4a · 71ec1a4a
Commit 71ec1a4a authored Feb 12, 2017 by Stefan Behnel
4 changed files
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -27,6 +27,10 @@ Bugs fixed
 * f-string processing was adapted to match recent changes in PEP 498 and
  CPython 3.6.
+* Unicode escapes in 'ur' raw-unicode strings were not resolved in Py2 code.
+  Original patch by Aaron Gallagher (Github issue #1594).
 0.25.2 (2016-12-08)
 ===================

--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -921,7 +921,8 @@ def p_string_literal(s, kind_override=None):
            if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
                has_non_ascii_literal_characters = True
        elif sy == 'ESCAPE':
-            if is_raw:
+            # in Py2, 'ur' raw unicode strings resolve unicode escapes but nothing else
+            if is_raw and (is_python3_source or kind != 'u' or systr[1] not in u'Uu'):
                chars.append(systr)
                if is_python3_source and not has_non_ascii_literal_characters and check_for_non_ascii_characters(systr):
                    has_non_ascii_literal_characters = True

--- a/tests/run/future_unicode_literals.pyx
+++ b/tests/run/future_unicode_literals.pyx
@@ -9,6 +9,8 @@ if sys.version_info[0] >= 3:
    True
    >>> isinstance(b, bytes)
    True
+    >>> raw ==  'abc\\\\xf8\\\\t\\u00f8\\U000000f8'  # unescaped by Python (required by doctest)
+    True
 """
 else:
    __doc__ = u"""
@@ -18,9 +20,13 @@ else:
    True
    >>> isinstance(b, str)
    True
+    >>> raw == u'abc\\\\xf8\\\\t\\u00f8\\U000000f8'  # unescaped by Python (required by doctest)
+    True
 """
 u = "test"
 cdef char* s = "bytes test"
 b = s
+raw = r'abc\xf8\t\u00f8\U000000f8'
--- a/tests/run/unicodeliterals.pyx
+++ b/tests/run/unicodeliterals.pyx
@@ -77,6 +77,8 @@ __doc__ = br"""
    True
    >>> k == u'\\N{SNOWMAN}' == u'\\u2603'
    True
+    >>> m == u'abc\\\\xf8\\\\t\\u00f8\\U000000f8'  # unescaped by Python (required by doctest)
+    True
    >>> add == u'Søk ik' + u'üÖä' + 'abc'
    True
    >>> null == u'\\x00' # unescaped by Python (required by doctest)
@@ -110,6 +112,7 @@ f = u'\xf8'
 g = u'\udc00'   # lone trail surrogate
 h = u'\ud800'   # lone lead surrogate
 k = u'\N{SNOWMAN}'
+m = ur'abc\xf8\t\u00f8\U000000f8'
 add = u'Søk ik' + u'üÖä' + u'abc'
 null = u'\x00'