Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Labels
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
nexedi
cython
Commits
5bd4e9b6
Commit
5bd4e9b6
authored
Sep 07, 2010
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
reject non-ASCII literal characters in Python 3 byte strings
parent
42420560
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
45 additions
and
5 deletions
+45
-5
Cython/Compiler/Parsing.pxd
Cython/Compiler/Parsing.pxd
+1
-0
Cython/Compiler/Parsing.py
Cython/Compiler/Parsing.py
+22
-5
tests/errors/cython3_bytes.pyx
tests/errors/cython3_bytes.pyx
+9
-0
tests/run/cython2_bytes.pyx
tests/run/cython2_bytes.pyx
+13
-0
No files found.
Cython/Compiler/Parsing.pxd
View file @
5bd4e9b6
...
...
@@ -47,6 +47,7 @@ cpdef p_atom(PyrexScanner s)
cpdef
p_name
(
PyrexScanner
s
,
name
)
cpdef
p_cat_string_literal
(
PyrexScanner
s
)
cpdef
p_opt_string_literal
(
PyrexScanner
s
,
required_type
=*
)
cpdef
bint
check_for_non_ascii_characters
(
unicode
string
)
cpdef
p_string_literal
(
PyrexScanner
s
,
kind_override
=*
)
cpdef
p_list_maker
(
PyrexScanner
s
)
cpdef
p_comp_iter
(
PyrexScanner
s
,
body
)
...
...
Cython/Compiler/Parsing.py
View file @
5bd4e9b6
...
...
@@ -661,9 +661,9 @@ def p_cat_string_literal(s):
bstrings
.
append
(
next_bytes_value
)
ustrings
.
append
(
next_unicode_value
)
# join and rewrap the partial literals
if
kind
in
(
'b'
,
'c'
,
''
)
or
kind
==
'u'
and
bstrings
[
0
]
is
not
None
:
if
kind
in
(
'b'
,
'c'
,
''
)
or
kind
==
'u'
and
None
not
in
bstrings
:
# Py3 enforced unicode literals are parsed as bytes/unicode combination
bytes_value
=
BytesLiteral
(
StringEncoding
.
join_bytes
(
[
b
for
b
in
bstrings
if
b
is
not
None
]
)
)
bytes_value
=
BytesLiteral
(
StringEncoding
.
join_bytes
(
bstrings
)
)
bytes_value
.
encoding
=
s
.
source_encoding
if
kind
in
(
'u'
,
''
):
unicode_value
=
EncodedString
(
u''
.
join
([
u
for
u
in
ustrings
if
u
is
not
None
])
)
...
...
@@ -681,6 +681,12 @@ def p_opt_string_literal(s, required_type='u'):
else
:
return
None
def
check_for_non_ascii_characters
(
string
):
for
c
in
string
:
if
c
>=
u'
\
x80
'
:
return
True
return
False
def
p_string_literal
(
s
,
kind_override
=
None
):
# A single string or char literal. Returns (kind, bvalue, uvalue)
# where kind in ('b', 'c', 'u', ''). The 'bvalue' is the source
...
...
@@ -692,6 +698,7 @@ def p_string_literal(s, kind_override=None):
# s.sy == 'BEGIN_STRING'
pos
=
s
.
position
()
is_raw
=
0
has_non_ASCII_literal_characters
=
False
kind
=
s
.
systring
[:
1
].
lower
()
if
kind
==
'r'
:
kind
=
''
...
...
@@ -715,12 +722,13 @@ def p_string_literal(s, kind_override=None):
while
1
:
s
.
next
()
sy
=
s
.
sy
systr
=
s
.
systring
#print "p_string_literal: sy =", sy, repr(s.systring) ###
if
sy
==
'CHARS'
:
chars
.
append
(
s
.
systring
)
chars
.
append
(
systr
)
if
not
has_non_ASCII_literal_characters
and
check_for_non_ascii_characters
(
systr
):
has_non_ASCII_literal_characters
=
True
elif
sy
==
'ESCAPE'
:
has_escape
=
True
systr
=
s
.
systring
if
is_raw
:
if
systr
==
u'
\
\
\
n
'
:
chars
.
append
(
u'
\
\
\
n
'
)
...
...
@@ -730,6 +738,8 @@ def p_string_literal(s, kind_override=None):
chars
.
append
(
u"'"
)
else
:
chars
.
append
(
systr
)
if
not
has_non_ASCII_literal_characters
and
check_for_non_ascii_characters
(
systr
):
has_non_ASCII_literal_characters
=
True
else
:
c
=
systr
[
1
]
if
c
in
u"01234567"
:
...
...
@@ -755,6 +765,8 @@ def p_string_literal(s, kind_override=None):
chars
.
append_uescape
(
chrval
,
systr
)
else
:
chars
.
append
(
u'
\
\
'
+
systr
[
1
:])
if
not
has_non_ASCII_literal_characters
and
check_for_non_ascii_characters
(
systr
):
has_non_ASCII_literal_characters
=
True
elif
sy
==
'NEWLINE'
:
chars
.
append
(
u'
\
n
'
)
elif
sy
==
'END_STRING'
:
...
...
@@ -772,6 +784,11 @@ def p_string_literal(s, kind_override=None):
error
(
pos
,
u"invalid character literal: %r"
%
bytes_value
)
else
:
bytes_value
,
unicode_value
=
chars
.
getstrings
()
if
has_non_ASCII_literal_characters
and
s
.
context
.
language_level
>=
3
:
# Python 3 forbids literal non-ASCII characters in byte strings
if
kind
!=
'u'
:
s
.
error
(
"bytes can only contain ASCII literal characters."
,
pos
=
pos
)
bytes_value
=
None
s
.
next
()
return
(
kind
,
bytes_value
,
unicode_value
)
...
...
tests/errors/cython3_bytes.pyx
0 → 100644
View file @
5bd4e9b6
# -*- coding: utf-8 -*-
# cython: language_level=3
escaped
=
b'abc
\
xc3
\
xbc
\
xc3
\
xb6
\
xc3
\
xa4
'
invalid
=
b'abcüöä'
_ERRORS
=
"""
5:10: bytes can only contain ASCII literal characters.
"""
tests/run/cython2_bytes.pyx
0 → 100644
View file @
5bd4e9b6
# -*- coding: utf-8 -*-
# cython: language_level=2
b
=
b'abcüöä
\
x12
'
cdef
char
*
cs
=
'abcüöä
\
x12
'
def
compare_cs
():
"""
>>> b == compare_cs()
True
"""
return
cs
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment