Commit e2dcfb58 authored by Stefan Behnel's avatar Stefan Behnel Committed by GitHub

Fix fix unicode normalisation test in Windows. (GH-3194)

* Rewrite the test code generation for the unicode normalisation test, making sure that we always write UTF-8 source files.

* Fix failures to print the compilation status for modules with non-ascii names (on Windows).

* Help with remote debugging environment encoding problems in test runs, by extending the output of the "buildenv" pseudo-test.

* Explicitly set I/O encoding for subprocesses in parallel cythonize() runs to make the test runner workers inherit it (instead of defaulting to ASCII in Py2).

* Use a Latin-1 Unicode character in the test for Unicode module names to make it more compatible with Windows file system character sets (such as CP-1252).

* Properly decode source and module file name from the FS encoding in Py2. Previously, with ASCII module names, UTF-8 decoding always worked, but wasn't correct.

* Hack around a distutils 3.[5678] bug on Windows for unicode module names.
https://bugs.python.org/issue39432

* Try to fix cython.inline() on Windows with Py3.8+ where the DLL loading requires an explicit registration of the extension output directory.
Closes GH-3450.
parent 66a8a8e8
...@@ -539,7 +539,7 @@ class DependencyTree(object): ...@@ -539,7 +539,7 @@ class DependencyTree(object):
all.add(include_path) all.add(include_path)
all.update(self.included_files(include_path)) all.update(self.included_files(include_path))
elif not self.quiet: elif not self.quiet:
print("Unable to locate '%s' referenced from '%s'" % (filename, include)) print(u"Unable to locate '%s' referenced from '%s'" % (filename, include))
return all return all
@cached_method @cached_method
...@@ -797,9 +797,9 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet= ...@@ -797,9 +797,9 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
if cython_sources: if cython_sources:
filepattern = cython_sources[0] filepattern = cython_sources[0]
if len(cython_sources) > 1: if len(cython_sources) > 1:
print("Warning: Multiple cython sources found for extension '%s': %s\n" print(u"Warning: Multiple cython sources found for extension '%s': %s\n"
"See https://cython.readthedocs.io/en/latest/src/userguide/sharing_declarations.html " u"See https://cython.readthedocs.io/en/latest/src/userguide/sharing_declarations.html "
"for sharing declarations among Cython files." % (pattern.name, cython_sources)) u"for sharing declarations among Cython files." % (pattern.name, cython_sources))
else: else:
# ignore non-cython modules # ignore non-cython modules
module_list.append(pattern) module_list.append(pattern)
...@@ -873,7 +873,7 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet= ...@@ -873,7 +873,7 @@ def create_extension_list(patterns, exclude=None, ctx=None, aliases=None, quiet=
m.sources.remove(target_file) m.sources.remove(target_file)
except ValueError: except ValueError:
# never seen this in the wild, but probably better to warn about this unexpected case # never seen this in the wild, but probably better to warn about this unexpected case
print("Warning: Cython source file not found in sources list, adding %s" % file) print(u"Warning: Cython source file not found in sources list, adding %s" % file)
m.sources.insert(0, file) m.sources.insert(0, file)
seen.add(name) seen.add(name)
return module_list, module_metadata return module_list, module_metadata
...@@ -973,6 +973,9 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False, ...@@ -973,6 +973,9 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
exclude_failures=exclude_failures, exclude_failures=exclude_failures,
language=language, language=language,
aliases=aliases) aliases=aliases)
fix_windows_unicode_modules(module_list)
deps = create_dependency_tree(ctx, quiet=quiet) deps = create_dependency_tree(ctx, quiet=quiet)
build_dir = getattr(options, 'build_dir', None) build_dir = getattr(options, 'build_dir', None)
...@@ -1041,9 +1044,12 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False, ...@@ -1041,9 +1044,12 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
if force or c_timestamp < dep_timestamp: if force or c_timestamp < dep_timestamp:
if not quiet and not force: if not quiet and not force:
if source == dep: if source == dep:
print("Compiling %s because it changed." % source) print(u"Compiling %s because it changed." % Utils.decode_filename(source))
else: else:
print("Compiling %s because it depends on %s." % (source, dep)) print(u"Compiling %s because it depends on %s." % (
Utils.decode_filename(source),
Utils.decode_filename(dep),
))
if not force and options.cache: if not force and options.cache:
fingerprint = deps.transitive_fingerprint(source, m, options) fingerprint = deps.transitive_fingerprint(source, m, options)
else: else:
...@@ -1114,7 +1120,7 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False, ...@@ -1114,7 +1120,7 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
if failed_modules: if failed_modules:
for module in failed_modules: for module in failed_modules:
module_list.remove(module) module_list.remove(module)
print("Failed compilations: %s" % ', '.join(sorted([ print(u"Failed compilations: %s" % ', '.join(sorted([
module.name for module in failed_modules]))) module.name for module in failed_modules])))
if options.cache: if options.cache:
...@@ -1125,6 +1131,41 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False, ...@@ -1125,6 +1131,41 @@ def cythonize(module_list, exclude=None, nthreads=0, aliases=None, quiet=False,
return module_list return module_list
def fix_windows_unicode_modules(module_list):
# Hack around a distutils 3.[5678] bug on Windows for unicode module names.
# https://bugs.python.org/issue39432
if sys.platform != "win32":
return
if sys.version_info < (3, 5) or sys.version_info >= (3, 8, 2):
return
def make_filtered_list(ignored_symbol, old_entries):
class FilteredExportSymbols(list):
# export_symbols for unicode filename cause link errors on Windows
# Cython doesn't need them (it already defines PyInit with the correct linkage)
# so use this class as a temporary fix to stop them from being generated
def __contains__(self, val):
# so distutils doesn't "helpfully" add PyInit_<name>
return val == ignored_symbol or list.__contains__(self, val)
filtered_list = FilteredExportSymbols(old_entries)
if old_entries:
filtered_list.extend(name for name in old_entries if name != ignored_symbol)
return filtered_list
for m in module_list:
# TODO: use m.name.isascii() in Py3.7+
try:
m.name.encode("ascii")
continue
except UnicodeEncodeError:
pass
m.export_symbols = make_filtered_list(
"PyInit_" + m.name.rsplit(".", 1)[-1],
m.export_symbols,
)
if os.environ.get('XML_RESULTS'): if os.environ.get('XML_RESULTS'):
compile_result_dir = os.environ['XML_RESULTS'] compile_result_dir = os.environ['XML_RESULTS']
def record_results(func): def record_results(func):
...@@ -1180,7 +1221,7 @@ def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None, ...@@ -1180,7 +1221,7 @@ def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None,
zip_fingerprint_file = fingerprint_file_base + '.zip' zip_fingerprint_file = fingerprint_file_base + '.zip'
if os.path.exists(gz_fingerprint_file) or os.path.exists(zip_fingerprint_file): if os.path.exists(gz_fingerprint_file) or os.path.exists(zip_fingerprint_file):
if not quiet: if not quiet:
print("%sFound compiled %s in cache" % (progress, pyx_file)) print(u"%sFound compiled %s in cache" % (progress, pyx_file))
if os.path.exists(gz_fingerprint_file): if os.path.exists(gz_fingerprint_file):
os.utime(gz_fingerprint_file, None) os.utime(gz_fingerprint_file, None)
with contextlib.closing(gzip_open(gz_fingerprint_file, 'rb')) as g: with contextlib.closing(gzip_open(gz_fingerprint_file, 'rb')) as g:
...@@ -1194,7 +1235,7 @@ def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None, ...@@ -1194,7 +1235,7 @@ def cythonize_one(pyx_file, c_file, fingerprint, quiet, options=None,
z.extract(artifact, os.path.join(dirname, artifact)) z.extract(artifact, os.path.join(dirname, artifact))
return return
if not quiet: if not quiet:
print("%sCythonizing %s" % (progress, pyx_file)) print(u"%sCythonizing %s" % (progress, Utils.decode_filename(pyx_file)))
if options is None: if options is None:
options = CompilationOptions(default_options) options = CompilationOptions(default_options)
options.output_file = c_file options.output_file = c_file
......
...@@ -264,6 +264,15 @@ def __invoke(%(params)s): ...@@ -264,6 +264,15 @@ def __invoke(%(params)s):
build_extension.build_lib = lib_dir build_extension.build_lib = lib_dir
build_extension.run() build_extension.run()
# On Windows, we need to add the library output directory to the DLL load path (Py3.8+).
# https://github.com/cython/cython/issues/3450
try:
add_dll_directory = os.add_dll_directory
except AttributeError:
pass
else:
add_dll_directory(os.path.dirname(module_path))
module = load_dynamic(module_name, module_path) module = load_dynamic(module_name, module_path)
_cython_inline_cache[orig_code, arg_sigs, key_hash] = module.__invoke _cython_inline_cache[orig_code, arg_sigs, key_hash] = module.__invoke
......
...@@ -450,9 +450,9 @@ def run_pipeline(source, options, full_module_name=None, context=None): ...@@ -450,9 +450,9 @@ def run_pipeline(source, options, full_module_name=None, context=None):
# ensure that the inputs are unicode (for Python 2) # ensure that the inputs are unicode (for Python 2)
if sys.version_info[0] == 2: if sys.version_info[0] == 2:
source = source.decode(sys.getfilesystemencoding()) source = Utils.decode_filename(source)
if full_module_name: if full_module_name:
full_module_name = full_module_name.decode("utf-8") full_module_name = Utils.decode_filename(full_module_name)
source_ext = os.path.splitext(source)[1] source_ext = os.path.splitext(source)[1]
options.configure_language_defaults(source_ext[1:]) # py/pyx options.configure_language_defaults(source_ext[1:]) # py/pyx
......
...@@ -5,6 +5,7 @@ import unittest ...@@ -5,6 +5,7 @@ import unittest
import shlex import shlex
import sys import sys
import tempfile import tempfile
from io import open
from .Compiler import Errors from .Compiler import Errors
from .CodeWriter import CodeWriter from .CodeWriter import CodeWriter
...@@ -196,23 +197,23 @@ def unpack_source_tree(tree_file, workdir, cython_root): ...@@ -196,23 +197,23 @@ def unpack_source_tree(tree_file, workdir, cython_root):
if workdir is None: if workdir is None:
workdir = tempfile.mkdtemp() workdir = tempfile.mkdtemp()
header, cur_file = [], None header, cur_file = [], None
with open(tree_file) as f: with open(tree_file, 'rb') as f:
try: try:
for line in f: for line in f:
if line.startswith('#####'): if line[:5] == b'#####':
filename = line.strip().strip('#').strip().replace('/', os.path.sep) filename = line.strip().strip(b'#').strip().decode('utf8').replace('/', os.path.sep)
path = os.path.join(workdir, filename) path = os.path.join(workdir, filename)
if not os.path.exists(os.path.dirname(path)): if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path)) os.makedirs(os.path.dirname(path))
if cur_file is not None: if cur_file is not None:
to_close, cur_file = cur_file, None to_close, cur_file = cur_file, None
to_close.close() to_close.close()
cur_file = open(path, 'w') cur_file = open(path, 'wb')
elif cur_file is not None: elif cur_file is not None:
cur_file.write(line) cur_file.write(line)
elif line.strip() and not line.lstrip().startswith('#'): elif line.strip() and not line.lstrip().startswith(b'#'):
if line.strip() not in ('"""', "'''"): if line.strip() not in (b'"""', b"'''"):
command = shlex.split(line) command = shlex.split(line.decode('utf8'))
if not command: continue if not command: continue
# In Python 3: prog, *args = command # In Python 3: prog, *args = command
prog, args = command[0], command[1:] prog, args = command[0], command[1:]
......
...@@ -13,10 +13,12 @@ environment: ...@@ -13,10 +13,12 @@ environment:
- PYTHON: "C:\\Python27" - PYTHON: "C:\\Python27"
PYTHON_VERSION: "2.7" PYTHON_VERSION: "2.7"
PYTHON_ARCH: "32" PYTHON_ARCH: "32"
PYTHONIOENCODING: "utf-8"
- PYTHON: "C:\\Python27-x64" - PYTHON: "C:\\Python27-x64"
PYTHON_VERSION: "2.7" PYTHON_VERSION: "2.7"
PYTHON_ARCH: "64" PYTHON_ARCH: "64"
PYTHONIOENCODING: "utf-8"
- PYTHON: "C:\\Python38" - PYTHON: "C:\\Python38"
PYTHON_VERSION: "3.8" PYTHON_VERSION: "3.8"
...@@ -67,6 +69,7 @@ environment: ...@@ -67,6 +69,7 @@ environment:
PYTHON_VERSION: "2.7" PYTHON_VERSION: "2.7"
PYTHON_ARCH: "64" PYTHON_ARCH: "64"
BACKEND: cpp BACKEND: cpp
PYTHONIOENCODING: "utf-8"
clone_depth: 5 clone_depth: 5
...@@ -92,6 +95,7 @@ build_script: ...@@ -92,6 +95,7 @@ build_script:
test: off test: off
test_script: test_script:
- "%PYTHON%\\Scripts\\pip.exe install -r test-requirements.txt" - "%PYTHON%\\Scripts\\pip.exe install -r test-requirements.txt"
- "%PYTHON%\\Scripts\\pip.exe install win_unicode_console"
- "set CFLAGS=/Od /W3" - "set CFLAGS=/Od /W3"
- "%WITH_ENV% %PYTHON%\\python.exe runtests.py -vv --backend=%BACKEND% --no-code-style -j5" - "%WITH_ENV% %PYTHON%\\python.exe runtests.py -vv --backend=%BACKEND% --no-code-style -j5"
......
...@@ -2197,6 +2197,9 @@ def main(): ...@@ -2197,6 +2197,9 @@ def main():
else: else:
keep_alive_interval = None keep_alive_interval = None
if options.shard_count > 1 and options.shard_num == -1: if options.shard_count > 1 and options.shard_num == -1:
if "PYTHONIOENCODING" not in os.environ:
# Make sure subprocesses can print() Unicode text.
os.environ["PYTHONIOENCODING"] = sys.stdout.encoding or sys.getdefaultencoding()
import multiprocessing import multiprocessing
pool = multiprocessing.Pool(options.shard_count) pool = multiprocessing.Pool(options.shard_count)
tasks = [(options, cmd_args, shard_num) for shard_num in range(options.shard_count)] tasks = [(options, cmd_args, shard_num) for shard_num in range(options.shard_count)]
...@@ -2340,6 +2343,14 @@ def runtests(options, cmd_args, coverage=None): ...@@ -2340,6 +2343,14 @@ def runtests(options, cmd_args, coverage=None):
else: else:
faulthandler.enable() faulthandler.enable()
if sys.platform == "win32" and sys.version_info < (3, 6):
# enable Unicode console output, if possible
try:
import win_unicode_console
except ImportError:
pass
else:
win_unicode_console.enable()
WITH_CYTHON = options.with_cython WITH_CYTHON = options.with_cython
ROOTDIR = os.path.abspath(options.root_dir) ROOTDIR = os.path.abspath(options.root_dir)
......
...@@ -130,4 +130,11 @@ CFLAGS (distutils) = {config_var('CFLAGS')} ...@@ -130,4 +130,11 @@ CFLAGS (distutils) = {config_var('CFLAGS')}
CFLAGS (env) = {get_env('CFLAGS', '')} CFLAGS (env) = {get_env('CFLAGS', '')}
LINKCC (distutils) = {config_var('LINKCC')} LINKCC (distutils) = {config_var('LINKCC')}
LINKCC (env) = {get_env('LINKCC', '')} LINKCC (env) = {get_env('LINKCC', '')}
Encodings:
LANG (env) = {get_env('LANG', '')}
PYTHONIOENCODING (env) = {get_env('PYTHONIOENCODING', '')}
sys stdout encoding = {sys.stdout.encoding}
sys default encoding = {sys.getdefaultencoding()}
sys FS encoding = {sys.getfilesystemencoding()}
""") """)
...@@ -72,10 +72,12 @@ def test(): ...@@ -72,10 +72,12 @@ def test():
return {1} return {1}
"""] """]
for idx in range(len(example_code)): from io import open
with open("test{0}.py".format(idx),"w") as f:
for idx, (code, strings) in enumerate(zip(example_code, string_pairs)):
with open("test{0}.py".format(idx), "w", encoding="utf8") as f:
code = code.format(*strings)
f.write("# -*- coding: utf-8 -*-\n")
# The code isn't Py2 compatible. Only write actual code in Py3+.
if sys.version_info[0] > 2: if sys.version_info[0] > 2:
f.write("# -*- coding: utf-8 -*-\n") f.write(code)
f.write(example_code[idx].format(*string_pairs[idx]))
else:
f.write("\n") # code isn't Python 2 compatible - write a dummy file
...@@ -7,8 +7,7 @@ PYTHON -m mydoctest ...@@ -7,8 +7,7 @@ PYTHON -m mydoctest
########### mydoctest.py ####### ########### mydoctest.py #######
import sys import sys
if (sys.version_info[0] < 3 or if sys.version_info < (3, 5):
(sys.version_info[0] == 3 and sys.version_info[1] < 5)):
# The module is only Cythonized and not build for these versions # The module is only Cythonized and not build for these versions
# so don't run the tests # so don't run the tests
exit() exit()
...@@ -28,9 +27,19 @@ exit(val) ...@@ -28,9 +27,19 @@ exit(val)
from __future__ import unicode_literals from __future__ import unicode_literals
import sys import sys
# enable Unicode console output, if possible
if sys.platform == "win32" and sys.version_info < (3, 6):
try:
import win_unicode_console
except ImportError:
pass
else:
win_unicode_console.enable()
from Cython.Build import cythonize from Cython.Build import cythonize
files = ["mymoδ.pyx", "from_cy.pyx"] files = ["mymoð.pyx", "from_cy.pyx"]
# For Python 2 and Python <= 3.4 just run pyx->c; # For Python 2 and Python <= 3.4 just run pyx->c;
...@@ -44,7 +53,7 @@ if sys.version_info >= (3, 5): ...@@ -44,7 +53,7 @@ if sys.version_info >= (3, 5):
ext_modules = modules ext_modules = modules
) )
############ mymoδ.pyx ######### ############ mymoð.pyx #########
def f(): def f():
return True return True
...@@ -52,7 +61,7 @@ def f(): ...@@ -52,7 +61,7 @@ def f():
cdef public api void cdef_func(): cdef public api void cdef_func():
pass pass
############ pxd_moδ.pxd ########## ############ pxd_moð.pxd ##########
cdef struct S: cdef struct S:
int x int x
...@@ -63,11 +72,11 @@ cdef public api void cdef_func() # just to test generation of headers ...@@ -63,11 +72,11 @@ cdef public api void cdef_func() # just to test generation of headers
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import mymoδ import mymoð
from mymoδ import f from mymoð import f
__doc__ = """ __doc__ = """
>>> mymoδ.f() >>> mymoð.f()
True True
>>> f() >>> f()
True True
...@@ -77,12 +86,12 @@ True ...@@ -77,12 +86,12 @@ True
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import mymoδ import mymoð
from mymoδ import f from mymoð import f
cimport pxd_moδ cimport pxd_moð
from pxd_moδ cimport S from pxd_moð cimport S
def test_imported(): def test_imported():
...@@ -90,14 +99,15 @@ def test_imported(): ...@@ -90,14 +99,15 @@ def test_imported():
>>> test_imported() >>> test_imported()
True True
""" """
return mymoδ.f() and f() # True and True return mymoð.f() and f() # True and True
def test_cimported(): def test_cimported():
""" """
>>> test_cimported() >>> test_cimported()
3 3
""" """
cdef pxd_moδ.S v1 cdef pxd_moð.S v1
v1.x = 1 v1.x = 1
cdef S v2 cdef S v2
v2.x = 2 v2.x = 2
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment