Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Xavier Thompson
cython
Commits
13bbc206
Commit
13bbc206
authored
Sep 30, 2013
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ignore UTF-8 BOMs at the beginning of source files
parent
fe78837b
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
36 additions
and
6 deletions
+36
-6
Cython/Utils.py
Cython/Utils.py
+21
-4
runtests.py
runtests.py
+7
-2
tests/compile/utf8bom.pyx
tests/compile/utf8bom.pyx
+8
-0
No files found.
Cython/Utils.py
View file @
13bbc206
...
@@ -215,6 +215,17 @@ def detect_opened_file_encoding(f):
...
@@ -215,6 +215,17 @@ def detect_opened_file_encoding(f):
return
encoding
.
group
(
1
)
return
encoding
.
group
(
1
)
return
"UTF-8"
return
"UTF-8"
def
skip_bom
(
f
):
"""
Read past a BOM at the beginning of a source file.
This could be added to the scanner, but it's *substantially* easier
to keep it at this level.
"""
if
f
.
read
(
1
)
!=
u'
\
uFEFF
'
:
f
.
seek
(
0
)
normalise_newlines
=
re
.
compile
(
u'
\
r
\
n
?|
\
n
'
).
sub
normalise_newlines
=
re
.
compile
(
u'
\
r
\
n
?|
\
n
'
).
sub
...
@@ -264,6 +275,7 @@ if sys.version_info >= (2,6):
...
@@ -264,6 +275,7 @@ if sys.version_info >= (2,6):
except
ImportError
:
except
ImportError
:
pass
pass
def
open_source_file
(
source_filename
,
mode
=
"r"
,
def
open_source_file
(
source_filename
,
mode
=
"r"
,
encoding
=
None
,
error_handling
=
None
,
encoding
=
None
,
error_handling
=
None
,
require_normalised_newlines
=
True
):
require_normalised_newlines
=
True
):
...
@@ -272,8 +284,11 @@ def open_source_file(source_filename, mode="r",
...
@@ -272,8 +284,11 @@ def open_source_file(source_filename, mode="r",
# it's UTF-8.
# it's UTF-8.
f
=
open_source_file
(
source_filename
,
encoding
=
"UTF-8"
,
mode
=
mode
,
error_handling
=
'ignore'
)
f
=
open_source_file
(
source_filename
,
encoding
=
"UTF-8"
,
mode
=
mode
,
error_handling
=
'ignore'
)
encoding
=
detect_opened_file_encoding
(
f
)
encoding
=
detect_opened_file_encoding
(
f
)
if
encoding
==
"UTF-8"
and
error_handling
==
'ignore'
and
require_normalised_newlines
:
if
(
encoding
==
"UTF-8"
and
error_handling
==
'ignore'
and
require_normalised_newlines
):
f
.
seek
(
0
)
f
.
seek
(
0
)
skip_bom
(
f
)
return
f
return
f
else
:
else
:
f
.
close
()
f
.
close
()
...
@@ -290,15 +305,17 @@ def open_source_file(source_filename, mode="r",
...
@@ -290,15 +305,17 @@ def open_source_file(source_filename, mode="r",
pass
pass
#
#
if
io
is
not
None
:
if
io
is
not
None
:
return
io
.
open
(
source_filename
,
mode
=
mode
,
stream
=
io
.
open
(
source_filename
,
mode
=
mode
,
encoding
=
encoding
,
errors
=
error_handling
)
encoding
=
encoding
,
errors
=
error_handling
)
else
:
else
:
# codecs module doesn't have universal newline support
# codecs module doesn't have universal newline support
stream
=
codecs
.
open
(
source_filename
,
mode
=
mode
,
stream
=
codecs
.
open
(
source_filename
,
mode
=
mode
,
encoding
=
encoding
,
errors
=
error_handling
)
encoding
=
encoding
,
errors
=
error_handling
)
if
require_normalised_newlines
:
if
require_normalised_newlines
:
stream
=
NormalisedNewlineStream
(
stream
)
stream
=
NormalisedNewlineStream
(
stream
)
return
stream
skip_bom
(
stream
)
return
stream
def
open_source_from_loader
(
loader
,
def
open_source_from_loader
(
loader
,
source_filename
,
source_filename
,
...
...
runtests.py
View file @
13bbc206
...
@@ -277,6 +277,9 @@ TEST_SUPPORT_DIR = 'testsupport'
...
@@ -277,6 +277,9 @@ TEST_SUPPORT_DIR = 'testsupport'
BACKENDS = ['c', 'cpp']
BACKENDS = ['c', 'cpp']
UTF8_BOM_BYTES = r'
\
xef
\
xbb
\
xbf
'.encode('ISO-8859-1').decode('unicode_escape')
def memoize(f):
def memoize(f):
uncomputed = object()
uncomputed = object()
f._cache = {}
f._cache = {}
...
@@ -287,13 +290,15 @@ def memoize(f):
...
@@ -287,13 +290,15 @@ def memoize(f):
return res
return res
return func
return func
@memoize
@memoize
def parse_tags(filepath):
def parse_tags(filepath):
tags = defaultdict(list)
tags = defaultdict(list)
f = io_open(filepath, encoding='ISO-8859-1', errors='
replac
e')
f = io_open(filepath, encoding='ISO-8859-1', errors='
ignor
e')
try:
try:
for line in f:
for line in f:
line = line.strip()
# ignore BOM-like bytes and whitespace
line = line.lstrip(UTF8_BOM_BYTES).strip()
if not line:
if not line:
continue
continue
if line[0] != '#':
if line[0] != '#':
...
...
tests/compile/utf8bom.pyx
0 → 100644
View file @
13bbc206
# coding: utf-8
# mode: compile
# this file starts with a UTF-8 encoded BOM
# the only thing we test is that it properly compiles
def
test
():
pass
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment