Commit 743365a5 authored by Stefan Behnel's avatar Stefan Behnel

Correct the positions reported for errors in f-strings.

Closes https://github.com/cython/cython/issues/3674
parent 4728b8a0
...@@ -33,6 +33,9 @@ Bugs fixed ...@@ -33,6 +33,9 @@ Bugs fixed
* The C++ ``typeid()`` function was allowed in C mode. * The C++ ``typeid()`` function was allowed in C mode.
Patch by Celelibi. (Github issue #3637) Patch by Celelibi. (Github issue #3637)
* The error position reported for errors found in f-strings was misleading.
(Github issue #3674)
* The new ``c_api_binop_methods`` directive was added for forward compatibility, but can * The new ``c_api_binop_methods`` directive was added for forward compatibility, but can
only be set to True (the current default value). It can be disabled in Cython 3.0. only be set to True (the current default value). It can be disabled in Cython 3.0.
......
...@@ -69,7 +69,8 @@ cdef bint check_for_non_ascii_characters(unicode string) ...@@ -69,7 +69,8 @@ cdef bint check_for_non_ascii_characters(unicode string)
@cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint) @cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint)
cdef p_string_literal(PyrexScanner s, kind_override=*) cdef p_string_literal(PyrexScanner s, kind_override=*)
cdef _append_escape_sequence(kind, builder, unicode escape_sequence, PyrexScanner s) cdef _append_escape_sequence(kind, builder, unicode escape_sequence, PyrexScanner s)
@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4) cdef tuple _f_string_error_pos(pos, string, Py_ssize_t i)
@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, next_start=Py_ssize_t)
cdef list p_f_string(PyrexScanner s, unicode_value, pos, bint is_raw) cdef list p_f_string(PyrexScanner s, unicode_value, pos, bint is_raw)
@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, quote_char=Py_UCS4, NO_CHAR=Py_UCS4) @cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, quote_char=Py_UCS4, NO_CHAR=Py_UCS4)
cdef tuple p_f_string_expr(PyrexScanner s, unicode_value, pos, Py_ssize_t starting_index, bint is_raw) cdef tuple p_f_string_expr(PyrexScanner s, unicode_value, pos, Py_ssize_t starting_index, bint is_raw)
......
...@@ -882,6 +882,7 @@ def p_string_literal(s, kind_override=None): ...@@ -882,6 +882,7 @@ def p_string_literal(s, kind_override=None):
pos = s.position() pos = s.position()
is_python3_source = s.context.language_level >= 3 is_python3_source = s.context.language_level >= 3
has_non_ascii_literal_characters = False has_non_ascii_literal_characters = False
string_start_pos = (pos[0], pos[1], pos[2] + len(s.systring))
kind_string = s.systring.rstrip('"\'').lower() kind_string = s.systring.rstrip('"\'').lower()
if len(kind_string) > 1: if len(kind_string) > 1:
if len(set(kind_string)) != len(kind_string): if len(set(kind_string)) != len(kind_string):
...@@ -965,7 +966,7 @@ def p_string_literal(s, kind_override=None): ...@@ -965,7 +966,7 @@ def p_string_literal(s, kind_override=None):
s.error("bytes can only contain ASCII literal characters.", pos=pos) s.error("bytes can only contain ASCII literal characters.", pos=pos)
bytes_value = None bytes_value = None
if kind == 'f': if kind == 'f':
unicode_value = p_f_string(s, unicode_value, pos, is_raw='r' in kind_string) unicode_value = p_f_string(s, unicode_value, string_start_pos, is_raw='r' in kind_string)
s.next() s.next()
return (kind, bytes_value, unicode_value) return (kind, bytes_value, unicode_value)
...@@ -1037,6 +1038,10 @@ _parse_escape_sequences_raw, _parse_escape_sequences = [re.compile(( ...@@ -1037,6 +1038,10 @@ _parse_escape_sequences_raw, _parse_escape_sequences = [re.compile((
for is_raw in (True, False)] for is_raw in (True, False)]
def _f_string_error_pos(pos, string, i):
return (pos[0], pos[1], pos[2] + i + 1) # FIXME: handle newlines in string
def p_f_string(s, unicode_value, pos, is_raw): def p_f_string(s, unicode_value, pos, is_raw):
# Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes # Parses a PEP 498 f-string literal into a list of nodes. Nodes are either UnicodeNodes
# or FormattedValueNodes. # or FormattedValueNodes.
...@@ -1044,15 +1049,13 @@ def p_f_string(s, unicode_value, pos, is_raw): ...@@ -1044,15 +1049,13 @@ def p_f_string(s, unicode_value, pos, is_raw):
next_start = 0 next_start = 0
size = len(unicode_value) size = len(unicode_value)
builder = StringEncoding.UnicodeLiteralBuilder() builder = StringEncoding.UnicodeLiteralBuilder()
error_pos = list(pos) # [src, line, column]
_parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences _parse_seq = _parse_escape_sequences_raw if is_raw else _parse_escape_sequences
while next_start < size: while next_start < size:
end = next_start end = next_start
error_pos[2] = pos[2] + end # FIXME: handle newlines in string
match = _parse_seq(unicode_value, next_start) match = _parse_seq(unicode_value, next_start)
if match is None: if match is None:
error(tuple(error_pos), "Invalid escape sequence") error(_f_string_error_pos(pos, unicode_value, next_start), "Invalid escape sequence")
next_start = match.end() next_start = match.end()
part = match.group() part = match.group()
...@@ -1076,7 +1079,8 @@ def p_f_string(s, unicode_value, pos, is_raw): ...@@ -1076,7 +1079,8 @@ def p_f_string(s, unicode_value, pos, is_raw):
if part == '}}': if part == '}}':
builder.append('}') builder.append('}')
else: else:
s.error("f-string: single '}' is not allowed", pos=tuple(error_pos)) error(_f_string_error_pos(pos, unicode_value, end),
"f-string: single '}' is not allowed")
else: else:
builder.append(part) builder.append(part)
...@@ -1097,16 +1101,20 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): ...@@ -1097,16 +1101,20 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
nested_depth = 0 nested_depth = 0
quote_char = NO_CHAR quote_char = NO_CHAR
in_triple_quotes = False in_triple_quotes = False
backslash_reported = False
while True: while True:
if i >= size: if i >= size:
s.error("missing '}' in format string expression") break # error will be reported below
c = unicode_value[i] c = unicode_value[i]
if quote_char != NO_CHAR: if quote_char != NO_CHAR:
if c == '\\': if c == '\\':
error_pos = (pos[0], pos[1] + i, pos[2]) # FIXME: handle newlines in string # avoid redundant error reports along '\' sequences
error(error_pos, "backslashes not allowed in f-strings") if not backslash_reported:
error(_f_string_error_pos(pos, unicode_value, i),
"backslashes not allowed in f-strings")
backslash_reported = True
elif c == quote_char: elif c == quote_char:
if in_triple_quotes: if in_triple_quotes:
if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c: if i + 2 < size and unicode_value[i + 1] == c and unicode_value[i + 2] == c:
...@@ -1125,7 +1133,8 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): ...@@ -1125,7 +1133,8 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
elif nested_depth != 0 and c in '}])': elif nested_depth != 0 and c in '}])':
nested_depth -= 1 nested_depth -= 1
elif c == '#': elif c == '#':
s.error("format string cannot include #") error(_f_string_error_pos(pos, unicode_value, i),
"format string cannot include #")
elif nested_depth == 0 and c in '!:}': elif nested_depth == 0 and c in '!:}':
# allow != as a special case # allow != as a special case
if c == '!' and i + 1 < size and unicode_value[i + 1] == '=': if c == '!' and i + 1 < size and unicode_value[i + 1] == '=':
...@@ -1141,12 +1150,13 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): ...@@ -1141,12 +1150,13 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...) expr_pos = (pos[0], pos[1], pos[2] + starting_index + 2) # TODO: find exact code position (concat, multi-line, ...)
if not expr_str.strip(): if not expr_str.strip():
error(expr_pos, "empty expression not allowed in f-string") error(_f_string_error_pos(pos, unicode_value, starting_index),
"empty expression not allowed in f-string")
if terminal_char == '!': if terminal_char == '!':
i += 1 i += 1
if i + 2 > size: if i + 2 > size:
error(expr_pos, "invalid conversion char at end of string") pass # error will be reported below
else: else:
conversion_char = unicode_value[i] conversion_char = unicode_value[i]
i += 1 i += 1
...@@ -1159,7 +1169,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): ...@@ -1159,7 +1169,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
start_format_spec = i + 1 start_format_spec = i + 1
while True: while True:
if i >= size: if i >= size:
s.error("missing '}' in format specifier", pos=expr_pos) break # error will be reported below
c = unicode_value[i] c = unicode_value[i]
if not in_triple_quotes and not in_string: if not in_triple_quotes and not in_string:
if c == '{': if c == '{':
...@@ -1181,7 +1191,9 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): ...@@ -1181,7 +1191,9 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
format_spec_str = unicode_value[start_format_spec:i] format_spec_str = unicode_value[start_format_spec:i]
if terminal_char != '}': if terminal_char != '}':
s.error("missing '}' in format string expression', found '%s'" % terminal_char) error(_f_string_error_pos(pos, unicode_value, i),
"missing '}' in format string expression" + (
", found '%s'" % terminal_char if terminal_char else ""))
# parse the expression as if it was surrounded by parentheses # parse the expression as if it was surrounded by parentheses
buf = StringIO('(%s)' % expr_str) buf = StringIO('(%s)' % expr_str)
...@@ -1190,7 +1202,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw): ...@@ -1190,7 +1202,7 @@ def p_f_string_expr(s, unicode_value, pos, starting_index, is_raw):
# validate the conversion char # validate the conversion char
if conversion_char is not None and not ExprNodes.FormattedValueNode.find_conversion_func(conversion_char): if conversion_char is not None and not ExprNodes.FormattedValueNode.find_conversion_func(conversion_char):
error(pos, "invalid conversion character '%s'" % conversion_char) error(expr_pos, "invalid conversion character '%s'" % conversion_char)
# the format spec is itself treated like an f-string # the format spec is itself treated like an f-string
if format_spec_str: if format_spec_str:
......
# mode: error
# tag: fstring
def incorrect_fstrings(x):
return [
f"{x}{'\\'}'{x+1}",
f"""{}""",
f"{}",
f"{x!}",
f"{",
f"{{}}}",
]
_ERRORS = """
6:16: backslashes not allowed in f-strings
7:14: empty expression not allowed in f-string
8:12: empty expression not allowed in f-string
9:14: missing '}' in format string expression, found '!'
10:12: empty expression not allowed in f-string
10:12: missing '}' in format string expression
11:15: f-string: single '}' is not allowed
"""
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment