From 53fc37a9f073b9a39ea7785f22c3f0ac051eb364 Mon Sep 17 00:00:00 2001
From: Stefan Behnel <>
Date: Sat, 19 Mar 2016 15:24:56 +0100
Subject: [PATCH] adapt to stricter version of the CPython implementation (that
 follows the actual PEP)

 Cython/Compiler/           | 13 ++--
 Cython/Compiler/Tests/ | 88 +++++++++++++---------------
 tests/run/int_literals.pyx           | 24 ++------
 3 files changed, 51 insertions(+), 74 deletions(-)

diff --git a/Cython/Compiler/ b/Cython/Compiler/
index 2cc0a2e1f..16a7ebfcc 100644
--- a/Cython/Compiler/
+++ b/Cython/Compiler/
@@ -24,25 +24,24 @@ def make_lexicon():
     bindigit = Any("01")
     octdigit = Any("01234567")
     hexdigit = Any("0123456789ABCDEFabcdef")
-    allow_ = Rep(Str("_"))
     indentation = Bol + Rep(Any(" \t"))
     def underscore_digits(d):
-        return d + Rep(Str("_") | d)
+        return Rep1(d) + Rep(Str("_") + Rep1(d))
     decimal = underscore_digits(digit)
     dot = Str(".")
-    exponent = allow_ + Any("Ee") + Opt(Any("+-")) + decimal
+    exponent = Any("Ee") + Opt(Any("+-")) + decimal
     decimal_fract = (decimal + dot + Opt(decimal)) | (dot + decimal)
     name = letter + Rep(letter | digit)
-    intconst = decimal | (Str("0") + ((Any("Xx") + allow_ + underscore_digits(hexdigit)) |
-                                      (Any("Oo") + allow_ + underscore_digits(octdigit)) |
-                                      (Any("Bb") + allow_ + underscore_digits(bindigit)) ))
+    intconst = decimal | (Str("0") + ((Any("Xx") + underscore_digits(hexdigit)) |
+                                      (Any("Oo") + underscore_digits(octdigit)) |
+                                      (Any("Bb") + underscore_digits(bindigit)) ))
     intsuffix = (Opt(Any("Uu")) + Opt(Any("Ll")) + Opt(Any("Ll"))) | (Opt(Any("Ll")) + Opt(Any("Ll")) + Opt(Any("Uu")))
     intliteral = intconst + intsuffix
     fltconst = (decimal_fract + Opt(exponent)) | (decimal + exponent)
-    imagconst = (intconst | fltconst) + allow_ + Any("jJ")
+    imagconst = (intconst | fltconst) + Any("jJ")
     beginstring = Opt(Any(string_prefixes) + Opt(Any(raw_prefixes)) |
                       Any(raw_prefixes) + Opt(Any(bytes_prefixes)) |
diff --git a/Cython/Compiler/Tests/ b/Cython/Compiler/Tests/
index f73334162..a6b839c9d 100644
--- a/Cython/Compiler/Tests/
+++ b/Cython/Compiler/Tests/
@@ -10,58 +10,66 @@ from __future__ import absolute_import
 from ...TestUtils import CythonTest
 from ..Errors import CompileError
+# Copied from CPython's
-    # Copied from CPython's
-    '4_______2',
-    '0b_1001_0100',
-    '0x_ffff_ffff',
-    '0o_5_7_7',
-    '1__.4',
-    '42_j',
-    '1.4_j',
-    '1.4e5_j',
-    '1_00_00_.5',
-    '1_e10',
-    '1_E10',
-    '1_e1_0',
+    '0b1001_0100',
+    '0xffff_ffff',
+    '0o5_7_7',
+    '1_00_00.5',
+    '1e1_0',
+# Copied from CPython's
+    # Trailing underscores:
+    '1.4j_',
-    # Copied from CPython's
-    # Trailing underscores:
     # Underscores in the base selector:
+    # Underscore right after the base selector:
+    '0b_0',
+    '0x_f',
+    '0o_5',
     # Old-style octal, still disallowed:
-    # Underscore after non-digit:
-    '1.4j_',
-    '1.4e_1',
-    '.1_4e_1',
-    '1.0e+_1',
+    # Special case with exponent:
+    '0 if 1_Else 1',
+    # Underscore right before a dot:
+    '1_.4',
+    '1_.4j',
+    # Underscore right after a dot:
-    '1._4e5_j',
-    ('0 if 1_____else 1', True),
-    ('0 if 1_____Else 1', False),
-    ('0 if 1.0_____else 1', True),
-    ('0 if 1.0_____Else 1', False),
+    # Underscore right after a sign:
+    '1.0e+_1',
+    # Multiple consecutive underscores:
+    '4_______2',
+    '0.1__4',
+    '0b1001__0100',
+    '0xffff__ffff',
+    '0o5__77',
+    '1e1__0',
+    # Underscore right before j:
+    '1.4_j',
+    '1.4e5_j',
+    # Underscore right before e:
+    '1_e1',
+    '1.4_e1',
+    # Underscore right after e:
+    '1e_1',
+    '1.4e_1',
@@ -88,22 +96,6 @@ class TestGrammar(CythonTest):
                     # cython: language_level=3
                     ''' + code) is not None
-    def test_underscore_number_expressions(self):
-        for expression, is_valid in UNDERSCORE_EXPRESSIONS:
-            code = 'x = ' + expression
-            fragment = u'''\
-                # cython: language_level=3
-                ''' + code
-            if is_valid:
-                assert self.fragment(fragment) is not None
-            else:
-                try:
-                    self.fragment(fragment)
-                except CompileError as exc:
-                    assert code in [s.strip() for s in str(exc).splitlines()], str(exc)
-                else:
-                    assert False, "Invalid Cython code '%s' failed to raise an exception" % code
 if __name__ == "__main__":
     import unittest
diff --git a/tests/run/int_literals.pyx b/tests/run/int_literals.pyx
index 790d1b70e..31bf83bc6 100644
--- a/tests/run/int_literals.pyx
+++ b/tests/run/int_literals.pyx
@@ -16,27 +16,13 @@ def valid_underscore_literals():
     # Copied from CPython's
     assert 0_0_0 == 0
     assert 4_2 == 42
-    assert 4_______2 == 42
     assert 1_0000_0000 == 100000000
-    assert 0b_1001_0100 == 0b10010100
-    assert 0x_ffff_ffff == 0xffffffff
-    assert 0o_5_7_7 == 0o577
-    assert 1__.4 == 1.4
-    assert 42_j == 42j
-    assert 1.4_j == 1.4j
-    assert 1.4e5_j == 1.4e5j
-    assert 1_00_00_.5 == 10000.5
-    assert 1_e10 == 1e10
-    assert 1_E10 == 1E10
-    assert 1_e1_0 == 1e10
+    assert 0b1001_0100 == 0b10010100
+    assert 0xffff_ffff == 0xffffffff
+    assert 0o5_7_7 == 0o577
+    assert 1_00_00.5 == 10000.5
+    assert 1e1_0 == 1e10
     assert .1_4 == .14
-    assert 0_ == 0
-    assert 42_ == 42
-    assert 0b1_ == 0b1
-    assert 0xf_ == 0xf
-    assert 0o5_ == 0o5
-    assert (0 if 1_____else 1) == 0
-    assert (0 if 1.0_____else 1) == 0