Commit 8d4d4839 authored by Stefan Behnel's avatar Stefan Behnel

Merge branch 'master' into release

parents 306a9ef6 5b8bcedb
...@@ -34,6 +34,11 @@ Features added ...@@ -34,6 +34,11 @@ Features added
* ``cython.inline()`` supports a direct ``language_level`` keyword argument that * ``cython.inline()`` supports a direct ``language_level`` keyword argument that
was previously only available via a directive. was previously only available via a directive.
* A new directive ``str_is_str=True`` was added that keeps unprefixed string
literals as type 'str' in both Py2 and Py3, and the builtin 'str' type unchanged
even when ``language_level=3`` is enabled. This is meant to help user code to
migrate to Python 3 semantics without making support for Python 2.x difficult.
* In CPython 3.6 and later, looking up globals in the module dict is almost * In CPython 3.6 and later, looking up globals in the module dict is almost
as fast as looking up C globals. as fast as looking up C globals.
(Github issue #2313) (Github issue #2313)
...@@ -143,6 +148,10 @@ Bugs fixed ...@@ -143,6 +148,10 @@ Bugs fixed
the return value was non-null. the return value was non-null.
Original patch by Matt Wozniski (Github Issue #2603) Original patch by Matt Wozniski (Github Issue #2603)
* The source file encoding detection could get confused if the
``c_string_encoding`` directive appeared within the first two lines.
(Github issue #2632)
Other changes Other changes
------------- -------------
......
...@@ -94,9 +94,18 @@ class Context(object): ...@@ -94,9 +94,18 @@ class Context(object):
if language_level is not None: if language_level is not None:
self.set_language_level(language_level) self.set_language_level(language_level)
if self.compiler_directives.get('str_is_str') is not None:
self.set_str_is_str(self.compiler_directives['str_is_str'])
self.gdb_debug_outputwriter = None self.gdb_debug_outputwriter = None
def set_str_is_str(self, str_is_str):
from .Future import unicode_literals
if str_is_str:
self.future_directives.discard(unicode_literals)
else:
self.future_directives.add(unicode_literals)
def set_language_level(self, level): def set_language_level(self, level):
self.language_level = level self.language_level = level
if level >= 3: if level >= 3:
......
...@@ -198,6 +198,7 @@ _directive_defaults = { ...@@ -198,6 +198,7 @@ _directive_defaults = {
'iterable_coroutine': False, # Make async coroutines backwards compatible with the old asyncio yield-from syntax. 'iterable_coroutine': False, # Make async coroutines backwards compatible with the old asyncio yield-from syntax.
'c_string_type': 'bytes', 'c_string_type': 'bytes',
'c_string_encoding': '', 'c_string_encoding': '',
'str_is_str': None, # fall back to 'language_level == 2'
'type_version_tag': True, # enables Py_TPFLAGS_HAVE_VERSION_TAG on extension types 'type_version_tag': True, # enables Py_TPFLAGS_HAVE_VERSION_TAG on extension types
'unraisable_tracebacks': True, 'unraisable_tracebacks': True,
'old_style_globals': False, 'old_style_globals': False,
...@@ -313,6 +314,7 @@ directive_types = { ...@@ -313,6 +314,7 @@ directive_types = {
'freelist': int, 'freelist': int,
'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'), 'c_string_type': one_of('bytes', 'bytearray', 'str', 'unicode'),
'c_string_encoding': normalise_encoding_name, 'c_string_encoding': normalise_encoding_name,
'str_is_str': bool,
} }
for key, val in _directive_defaults.items(): for key, val in _directive_defaults.items():
...@@ -347,6 +349,7 @@ directive_scopes = { # defaults to available everywhere ...@@ -347,6 +349,7 @@ directive_scopes = { # defaults to available everywhere
# Avoid scope-specific to/from_py_functions for c_string. # Avoid scope-specific to/from_py_functions for c_string.
'c_string_type': ('module',), 'c_string_type': ('module',),
'c_string_encoding': ('module',), 'c_string_encoding': ('module',),
'str_is_str': ('module',),
'type_version_tag': ('module', 'cclass'), 'type_version_tag': ('module', 'cclass'),
'language_level': ('module',), 'language_level': ('module',),
# globals() could conceivably be controlled at a finer granularity, # globals() could conceivably be controlled at a finer granularity,
......
...@@ -3652,6 +3652,9 @@ def p_compiler_directive_comments(s): ...@@ -3652,6 +3652,9 @@ def p_compiler_directive_comments(s):
if 'language_level' in new_directives: if 'language_level' in new_directives:
# Make sure we apply the language level already to the first token that follows the comments. # Make sure we apply the language level already to the first token that follows the comments.
s.context.set_language_level(new_directives['language_level']) s.context.set_language_level(new_directives['language_level'])
if 'str_is_str' in new_directives:
# Make sure we apply 'str_is_str' directive already to the first token that follows the comments.
s.context.set_str_is_str(new_directives['str_is_str'])
result.update(new_directives) result.update(new_directives)
......
...@@ -21,6 +21,7 @@ from .PyrexTypes import py_object_type, unspecified_type ...@@ -21,6 +21,7 @@ from .PyrexTypes import py_object_type, unspecified_type
from .TypeSlots import ( from .TypeSlots import (
pyfunction_signature, pymethod_signature, richcmp_special_methods, pyfunction_signature, pymethod_signature, richcmp_special_methods,
get_special_method_signature, get_property_accessor_signature) get_special_method_signature, get_property_accessor_signature)
from . import Future
from . import Code from . import Code
...@@ -1002,10 +1003,12 @@ class BuiltinScope(Scope): ...@@ -1002,10 +1003,12 @@ class BuiltinScope(Scope):
cname, type = definition cname, type = definition
self.declare_var(name, type, None, cname) self.declare_var(name, type, None, cname)
def lookup(self, name, language_level=None): def lookup(self, name, language_level=None, str_is_str=None):
# 'language_level' is passed by ModuleScope # 'language_level' and 'str_is_str' are passed by ModuleScope
if language_level == 3:
if name == 'str': if name == 'str':
if str_is_str is None:
str_is_str = language_level in (None, 2)
if not str_is_str:
name = 'unicode' name = 'unicode'
return Scope.lookup(self, name) return Scope.lookup(self, name)
...@@ -1174,15 +1177,18 @@ class ModuleScope(Scope): ...@@ -1174,15 +1177,18 @@ class ModuleScope(Scope):
def global_scope(self): def global_scope(self):
return self return self
def lookup(self, name, language_level=None): def lookup(self, name, language_level=None, str_is_str=None):
entry = self.lookup_here(name) entry = self.lookup_here(name)
if entry is not None: if entry is not None:
return entry return entry
if language_level is None: if language_level is None:
language_level = self.context.language_level if self.context is not None else 3 language_level = self.context.language_level if self.context is not None else 3
if str_is_str is None:
str_is_str = language_level == 2 or (
self.context is not None and Future.unicode_literals not in self.context.future_directives)
return self.outer_scope.lookup(name, language_level=language_level) return self.outer_scope.lookup(name, language_level=language_level, str_is_str=str_is_str)
def declare_tuple_type(self, pos, components): def declare_tuple_type(self, pos, components):
components = tuple(components) components = tuple(components)
......
...@@ -238,7 +238,7 @@ def decode_filename(filename): ...@@ -238,7 +238,7 @@ def decode_filename(filename):
# support for source file encoding detection # support for source file encoding detection
_match_file_encoding = re.compile(b"coding[:=]\s*([-\w.]+)").search _match_file_encoding = re.compile(br"(\w*coding)[:=]\s*([-\w.]+)").search
def detect_opened_file_encoding(f): def detect_opened_file_encoding(f):
...@@ -254,8 +254,8 @@ def detect_opened_file_encoding(f): ...@@ -254,8 +254,8 @@ def detect_opened_file_encoding(f):
if not data: if not data:
break break
m = _match_file_encoding(lines[0]) m = _match_file_encoding(lines[0])
if m: if m and m.group(1) != b'c_string_encoding':
return m.group(1).decode('iso8859-1') return m.group(2).decode('iso8859-1')
elif len(lines) > 1: elif len(lines) > 1:
m = _match_file_encoding(lines[1]) m = _match_file_encoding(lines[1])
if m: if m:
......
...@@ -771,6 +771,81 @@ For public extension types, the object and type clauses are both required, ...@@ -771,6 +771,81 @@ For public extension types, the object and type clauses are both required,
because Cython must be able to generate code that is compatible with external C because Cython must be able to generate code that is compatible with external C
code. code.
Attribute name matching and aliasing
------------------------------------
Sometimes the type's C struct as specified in ``object_struct_name`` may use
different labels for the fields than those in the ``PyTypeObject``. This can
easily happen in hand-coded C extensions where the ``PyTypeObject_Foo`` has a
getter method, but the name does not match the name in the ``PyFooObject``. In
NumPy, for instance, python-level ``dtype.itemsize`` is a getter for the C
struct field ``elsize``. Cython supports aliasing field names so that one can
write ``dtype.itemsize`` in Cython code which will be compiled into direct
access of the C struct field, without going through a C-API equivalent of
``dtype.__getattr__('itemsize')``.
For example we may have an extension
module ``foo_extension``::
cdef class Foo:
cdef public int field0, field1, field2;
def __init__(self, f0, f1, f2):
self.field0 = f0
self.field1 = f1
self.field2 = f2
but a C struct in a file ``foo_nominal.h``::
typedef struct {
PyObject_HEAD
int f0;
int f1;
int f2;
} FooStructNominal;
Note that the struct uses ``f0``, ``f1``, ``f2`` but they are ``field0``,
``field1``, and ``field2`` in ``Foo``. We are given this situation, including
a header file with that struct, and we wish to write a function to sum the
values. If we write an extension module ``wrapper``::
cdef extern from "foo_nominal.h":
ctypedef class foo_extension.Foo [object FooStructNominal]:
cdef:
int field0
int field1
int feild2
def sum(Foo f):
return f.field0 + f.field1 + f.field2
then ``wrapper.sum(f)`` (where ``f = foo_extension.Foo(1, 2, 3)``) will still
use the C-API equivalent of::
return f.__getattr__('field0') +
f.__getattr__('field1') +
f.__getattr__('field1')
instead of the desired C equivalent of ``return f->f0 + f->f1 + f->f2``. We can
alias the fields by using::
cdef extern from "foo_nominal.h":
ctypedef class foo_extension.Foo [object FooStructNominal]:
cdef:
int field0 "f0"
int field1 "f1"
int field2 "f2"
def sum(Foo f) except -1:
return f.field0 + f.field1 + f.field2
and now Cython will replace the slow ``__getattr__`` with direct C access to
the FooStructNominal fields. This is useful when directly processing Python
code. No changes to Python need be made to achieve significant speedups, even
though the field names in Python and C are different. Of course, one should
make sure the fields are equivalent.
Implicit importing Implicit importing
------------------ ------------------
......
...@@ -219,6 +219,7 @@ same applies equally to union and enum declarations. ...@@ -219,6 +219,7 @@ same applies equally to union and enum declarations.
| } Foo; | | | | } Foo; | | |
+-------------------------+---------------------------------------------+-----------------------------------------------------------------------+ +-------------------------+---------------------------------------------+-----------------------------------------------------------------------+
See also use of :ref:`external_extension_types`.
Note that in all the cases below, you refer to the type in Cython code simply Note that in all the cases below, you refer to the type in Cython code simply
as :c:type:`Foo`, not ``struct Foo``. as :c:type:`Foo`, not ``struct Foo``.
......
...@@ -1735,6 +1735,9 @@ class EndToEndTest(unittest.TestCase): ...@@ -1735,6 +1735,9 @@ class EndToEndTest(unittest.TestCase):
old_path = os.environ.get('PYTHONPATH') old_path = os.environ.get('PYTHONPATH')
env = dict(os.environ) env = dict(os.environ)
env['PYTHONPATH'] = self.cython_syspath + os.pathsep + (old_path or '') env['PYTHONPATH'] = self.cython_syspath + os.pathsep + (old_path or '')
cmd = []
out = []
err = []
for command_no, command in enumerate(filter(None, commands.splitlines()), 1): for command_no, command in enumerate(filter(None, commands.splitlines()), 1):
with self.stats.time('%s(%d)' % (self.name, command_no), 'c', with self.stats.time('%s(%d)' % (self.name, command_no), 'c',
'etoe-build' if ' setup.py ' in command else 'etoe-run'): 'etoe-build' if ' setup.py ' in command else 'etoe-run'):
...@@ -1743,11 +1746,15 @@ class EndToEndTest(unittest.TestCase): ...@@ -1743,11 +1746,15 @@ class EndToEndTest(unittest.TestCase):
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
shell=True, shell=True,
env=env) env=env)
out, err = p.communicate() _out, _err = p.communicate()
cmd.append(command)
out.append(_out)
err.append(_err)
res = p.returncode res = p.returncode
if res != 0: if res != 0:
sys.stderr.write("%s\n%s\n%s\n" % ( for c, o, e in zip(cmd, out, err):
command, self._try_decode(out), self._try_decode(err))) sys.stderr.write("%s\n%s\n%s\n\n" % (
c, self._try_decode(o), self._try_decode(e)))
self.assertEqual(0, res, "non-zero exit status") self.assertEqual(0, res, "non-zero exit status")
self.success = True self.success = True
......
# cython: language_level=3, binding=True, str_is_str=True
# mode: run
# tag: python3, str_is_str
print(end='') # test that language_level 3 applies immediately at the module start, for the first token.
__doc__ = """
>>> items = sorted(locals_function(1).items())
>>> for item in items:
... print('%s = %r' % item)
a = 1
b = 2
x = 'abc'
"""
def locals_function(a, b=2):
x = 'abc'
return locals()
### true division
def truediv(x):
"""
>>> truediv(4)
2.0
>>> truediv(3)
1.5
"""
return x / 2
def truediv_int(int x):
"""
>>> truediv_int(4)
2.0
>>> truediv_int(3)
1.5
"""
return x / 2
### Py3 feature tests
def print_function(*args):
"""
>>> print_function(1,2,3)
1 2 3
"""
print(*args) # this isn't valid Py2 syntax
str_string = "abcdefg"
def no_unicode_literals():
"""
>>> print( no_unicode_literals() )
True
abcdefg
"""
print(isinstance(str_string, str) or type(str_string))
return str_string
def str_type_is_str():
"""
>>> str_type, s = str_type_is_str()
>>> isinstance(s, type(str_string)) or (s, str_type)
True
>>> isinstance(s, str_type) or (s, str_type)
True
>>> isinstance(str_string, str_type) or str_type
True
"""
cdef str s = 'abc'
return str, s
def annotation_syntax(a: "test new test", b : "other" = 2, *args: "ARGS", **kwargs: "KWARGS") -> "ret":
"""
>>> annotation_syntax(1)
3
>>> annotation_syntax(1,3)
4
>>> len(annotation_syntax.__annotations__)
5
>>> annotation_syntax.__annotations__['a']
'test new test'
>>> annotation_syntax.__annotations__['b']
'other'
>>> annotation_syntax.__annotations__['args']
'ARGS'
>>> annotation_syntax.__annotations__['kwargs']
'KWARGS'
>>> annotation_syntax.__annotations__['return']
'ret'
"""
result : int = a + b
return result
PYTHON setup.py build_ext --inplace
PYTHON -c "import runner"
######## setup.py ########
from Cython.Build.Dependencies import cythonize
from distutils.core import setup
# force the build order
setup(ext_modules= cythonize("foo_extension.pyx"))
setup(ext_modules = cythonize("getter*.pyx"))
######## foo_nominal.h ########
#include <Python.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
PyObject_HEAD
int f0;
int f1;
int f2;
} FooStructNominal;
#ifdef __cplusplus
}
#endif
######## foo_extension.pyx ########
cdef class Foo:
cdef public int field0, field1, field2;
def __init__(self, f0, f1, f2):
self.field0 = f0
self.field1 = f1
self.field2 = f2
cdef get_field0(Foo f):
return f.field0
cdef get_field1(Foo f):
return f.field1
cdef get_field2(Foo f):
return f.field2
# A pure-python class that disallows direct access to fields
class OpaqueFoo(Foo):
@property
def field0(self):
raise AttributeError('no direct access to field0')
@property
def field1(self):
raise AttributeError('no direct access to field1')
@property
def field2(self):
raise AttributeError('no direct access to field2')
######## getter0.pyx ########
# Access base Foo fields from C via aliased field names
cdef extern from "foo_nominal.h":
ctypedef class foo_extension.Foo [object FooStructNominal]:
cdef:
int field0 "f0"
int field1 "f1"
int field2 "f2"
def sum(Foo f):
# the f.__getattr__('field0') is replaced in c by f->f0
return f.field0 + f.field1 + f.field2
######## runner.py ########
import foo_extension, getter0
foo = foo_extension.Foo(23, 123, 1023)
assert foo.field0 == 23
assert foo.field1 == 123
assert foo.field2 == 1023
ret = getter0.sum(foo)
assert ret == foo.field0 + foo.field1 + foo.field2
opaque_foo = foo_extension.OpaqueFoo(23, 123, 1023)
# C can access the fields through the aliases
opaque_ret = getter0.sum(opaque_foo)
assert opaque_ret == ret
try:
# Python cannot access the fields
f0 = opaque_ret.field0
assert False
except AttributeError as e:
pass
# cython: c_string_type = str # cython: c_string_encoding=default
# cython: c_string_encoding = default # cython: c_string_type=str
# NOTE: the directive order above is specifically meant to trigger (and confuse) the
# source encoding detector with "coding=default".
import sys import sys
if sys.version_info[0] >= 3: if sys.version_info[0] >= 3:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment