Correct the positions reported for errors in f-strings.

Closes https://github.com/cython/cython/issues/3674

Correct the positions reported for errors in f-strings.
Closes https://github.com/cython/cython/issues/3674
743365a5 · Stefan Behnel · 4728b8a0 · 743365a5 · 743365a5 · 743365a5
Commit 743365a5 authored Jun 10, 2020 by Stefan Behnel
4 changed files
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -33,6 +33,9 @@ Bugs fixed
 * The C++ ``typeid()`` function was allowed in C mode.
  Patch by Celelibi.  (Github issue #3637)
+* The error position reported for errors found in f-strings was misleading.
+  (Github issue #3674)
 * The new ``c_api_binop_methods`` directive was added for forward compatibility, but can
  only be set to True (the current default value).  It can be disabled in Cython 3.0.

--- a/Cython/Compiler/Parsing.pxd
+++ b/Cython/Compiler/Parsing.pxd
@@ -69,7 +69,8 @@ cdef bint check_for_non_ascii_characters(unicode string)
 @cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint)
 cdef p_string_literal(PyrexScanner s, kind_override=*)
 cdef _append_escape_sequence(kind, builder, unicode escape_sequence, PyrexScanner s)
-@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4)
+cdef tuple _f_string_error_pos(pos, string, Py_ssize_t i)
+@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, next_start=Py_ssize_t)
 cdef list p_f_string(PyrexScanner s, unicode_value, pos, bint is_raw)
 @cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, quote_char=Py_UCS4, NO_CHAR=Py_UCS4)
 cdef tuple p_f_string_expr(PyrexScanner s, unicode_value, pos, Py_ssize_t starting_index, bint is_raw)

--- a/Cython/Compiler/Parsing.py
+++ b/Cython/Compiler/Parsing.py
@@ -882,6 +882,7 @@ def p_string_literal(s, kind_override=None):
    pos = s.position()
    is_python3_source = s.context.language_level >= 3
    has_non_ascii_literal_characters = False
+    string_start_pos = (pos[0], pos[1], pos[2] + len(s.systring))
    kind_string = s.systring.rstrip('"\'').lower()
    if len(kind_string) > 1:
        if len(set(kind_string)) != len(kind_string):
@@ -965,7 +966,7 @@ def p_string_literal(s, kind_override=None):
                s.error("bytes can only contain ASCII literal characters.", pos=pos)
            bytes_value = None
    if kind == 'f':
-        unicode_value = p_f_string(s, unicode_value, pos, is_raw='r' in kind_string)
+        unicode_value = p_f_string(s, unicode_value, string_start_pos, is_raw='r' in kind_string)
    s.next()
    return (kind, bytes_value, unicode_value)
@@ -1037,6 +1038,10 @@ _parse_escape_sequences_raw, _parse_escape_sequences = [re.compile((
    for is_raw in (True, False)]
+def _f_string_error_pos(pos, string, i):
+    return (pos[0], pos[1], pos[2] + i + 1)  # FIXME: handle newlines in string
 def p_f_string(s, unicode_value, pos, is_raw):
    # Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
    # or FormattedValueNodes.
@@ -1044,15 +1049,13 @@ def p_f_string(s, unicode_value, pos, is_raw):
    next_start = 0
    size = len(unicode_value)
    builder = StringEncoding.UnicodeLiteralBuilder()
-    error_pos = list(pos)  # [src, line, column]
    _parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences
    while next_start < size:
        end = next_start
-        error_pos[2] = pos[2] + end  # FIXME: handle newlines in string
        match = _parse_seq(unicode_value, next_start)
        if match is None:
-            error(tuple(error_pos), "Invalid escape sequence")
+            error(_f_string_error_pos(pos, unicode_value, next_start), "Invalid escape sequence")
        next_start = match.end()
        part = match.group()
@@ -1076,7 +1079,8 @@ def p_f_string(s, unicode_value, pos, is_raw):
            if part == '}}':
                builder.append('}')
            else:
-                s.error("f-string: single '}' is not allowed", pos=tuple(error_pos))
+                error(_f_string_error_pos(pos, unicode_value, end),
+                      "f-string: single '}' is not allowed")
        else:
            builder.append(part)
@@ -1097,16 +1101,20 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
    nested_depth = 0
    quote_char = NO_CHAR
    in_triple_quotes = False
+    backslash_reported = False
    while True:
        if i >= size:
-            s.error("missing '}' in format string expression")
+            break  # error will be reported below
        c = unicode_value[i]
        if quote_char != NO_CHAR:
            if c == '\\':
-                error_pos = (pos[0], pos[1] + i, pos[2])  # FIXME: handle newlines in string
+                # avoid redundant error reports along '\' sequences
-                error(error_pos, "backslashes not allowed in f-strings")
+                if not backslash_reported:
+                    error(_f_string_error_pos(pos, unicode_value, i),
+                          "backslashes not allowed in f-strings")
+                backslash_reported = True
            elif c == quote_char:
                if in_triple_quotes:
                    if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
@@ -1125,7 +1133,8 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
        elif nested_depth != 0 and c in '}])':
            nested_depth -= 1
        elif c == '#':
-            s.error("format string cannot include #")
+            error(_f_string_error_pos(pos, unicode_value, i),
+                  "format string cannot include #")
        elif nested_depth == 0 and c in '!:}':
            # allow != as a special case
            if c == '!' and i + 1 < size and unicode_value[i + 1] == '=':
@@ -1141,12 +1150,13 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
    expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2)  # TODO: find exact code position (concat, multi-line, ...)
    if not expr_str.strip():
-        error(expr_pos, "empty expression not allowed in f-string")
+        error(_f_string_error_pos(pos, unicode_value, starting_index),
+              "empty expression not allowed in f-string")
    if terminal_char == '!':
        i += 1
        if i + 2 > size:
-            error(expr_pos, "invalid conversion char at end of string")
+            pass  # error will be reported below
        else:
            conversion_char = unicode_value[i]
            i += 1
@@ -1159,7 +1169,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
        start_format_spec = i + 1
        while True:
            if i >= size:
-                s.error("missing '}' in format specifier", pos=expr_pos)
+                break  # error will be reported below
            c = unicode_value[i]
            if not in_triple_quotes and not in_string:
                if c == '{':
@@ -1181,7 +1191,9 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
        format_spec_str = unicode_value[start_format_spec:i]
    if terminal_char != '}':
-        s.error("missing '}' in format string expression', found '%s'" % terminal_char)
+        error(_f_string_error_pos(pos, unicode_value, i),
+              "missing '}' in format string expression" + (
+                  ", found '%s'" % terminal_char if terminal_char else ""))
    # parse the expression as if it was surrounded by parentheses
    buf = StringIO('(%s)' % expr_str)
@@ -1190,7 +1202,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
    # validate the conversion char
    if conversion_char is not None and not ExprNodes.FormattedValueNode.find_conversion_func(conversion_char):
-        error(pos, "invalid conversion character '%s'" % conversion_char)
+        error(expr_pos, "invalid conversion character '%s'" % conversion_char)
    # the format spec is itself treated like an f-string
    if format_spec_str:

--- a/tests/errors/e_fstring.pyx
+++ b/tests/errors/e_fstring.pyx
+# mode: error
+# tag: fstring
+def incorrect_fstrings(x):
+    return [
+        f"{x}{'\\'}'{x+1}",
+        f"""{}""",
+        f"{}",
+        f"{x!}",
+        f"{",
+        f"{{}}}",
+    ]
+_ERRORS = """
+6:16: backslashes not allowed in f-strings
+7:14: empty expression not allowed in f-string
+8:12: empty expression not allowed in f-string
+9:14: missing '}' in format string expression, found '!'
+10:12: empty expression not allowed in f-string
+10:12: missing '}' in format string expression
+11:15: f-string: single '}' is not allowed
+"""