Commit d89dd2a9 authored by Robert Bradshaw's avatar Robert Bradshaw

Merge branch 'master' of github.com:cython/cython

parents 8c531023 e085197b
......@@ -4,6 +4,9 @@ __pycache__
*.so
*.o
*.egg
*.egg-info
Cython/Compiler/*.c
Cython/Plex/*.c
Cython/Runtime/refnanny.c
......
......@@ -3,6 +3,8 @@ syntax: glob
*.pyc
*.pyo
__pycache__
*.egg
*.egg-info
Cython/Compiler/*.c
Cython/Plex/*.c
......
......@@ -8,6 +8,10 @@ Cython Changelog
Features added
--------------
* Using ``cdef basestring stringvar`` and function arguments typed as
``basestring`` is now meaningful and allows assigning exactly
``str`` and ``unicode`` objects, but no subtypes of these types.
* Support for the ``__debug__`` builtin.
* Assertions in Cython compiled modules are disabled if the running
......@@ -24,7 +28,7 @@ Features added
to the cythonize() compilation function (including distutils build).
* The new extension type decorator ``@cython.no_gc_clear`` prevents
the type from being cleared during cyclic garbage collection, thus
objects from being cleared during cyclic garbage collection, thus
making sure that object attributes are kept alive until deallocation.
* During cyclic garbage collection, attributes of extension types that
......@@ -57,7 +61,7 @@ Other changes
cleanup instead of ``tp_del()``.
0.19.2 (??)
0.19.2 (2013-10-13)
===================
Features added
......@@ -66,6 +70,13 @@ Features added
Bugs fixed
----------
* Some standard declarations were fixed or updated, including the previously
incorrect declaration of ``PyBuffer_FillInfo()`` and some missing bits in
``libc.math``.
* Heap allocated subtypes of ``type`` used the wrong base type struct at the
C level.
* Calling the unbound method dict.keys/value/items() in dict subtypes could
call the bound object method instead of the unbound supertype method.
......
......@@ -408,7 +408,7 @@ def init_builtins():
'__debug__', PyrexTypes.c_const_type(PyrexTypes.c_bint_type),
pos=None, cname='(!Py_OptimizeFlag)', is_cdef=True)
global list_type, tuple_type, dict_type, set_type, frozenset_type
global bytes_type, str_type, unicode_type
global bytes_type, str_type, unicode_type, basestring_type
global float_type, bool_type, type_type, complex_type
type_type = builtin_scope.lookup('type').type
list_type = builtin_scope.lookup('list').type
......@@ -419,6 +419,7 @@ def init_builtins():
bytes_type = builtin_scope.lookup('bytes').type
str_type = builtin_scope.lookup('str').type
unicode_type = builtin_scope.lookup('unicode').type
basestring_type = builtin_scope.lookup('basestring').type
float_type = builtin_scope.lookup('float').type
bool_type = builtin_scope.lookup('bool').type
complex_type = builtin_scope.lookup('complex').type
......
......@@ -67,7 +67,9 @@ coercion_error_dict = {
(Builtin.unicode_type, PyrexTypes.c_uchar_ptr_type) : "Unicode objects only support coercion to Py_UNICODE*.",
(Builtin.bytes_type, Builtin.unicode_type) : "Cannot convert 'bytes' object to unicode implicitly, decoding required",
(Builtin.bytes_type, Builtin.str_type) : "Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.",
(Builtin.bytes_type, Builtin.basestring_type) : "Cannot convert 'bytes' object to basestring implicitly. This is not portable to Py3.",
(Builtin.bytes_type, PyrexTypes.c_py_unicode_ptr_type) : "Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.",
(Builtin.basestring_type, Builtin.bytes_type) : "Cannot convert 'basestring' object to bytes implicitly. This is not portable.",
(Builtin.str_type, Builtin.unicode_type) : "str objects do not support coercion to unicode, use a unicode string literal instead (u'')",
(Builtin.str_type, Builtin.bytes_type) : "Cannot convert 'str' to 'bytes' implicitly. This is not portable.",
(Builtin.str_type, PyrexTypes.c_char_ptr_type) : "'str' objects do not support coercion to C types (use 'bytes'?).",
......@@ -76,6 +78,7 @@ coercion_error_dict = {
(PyrexTypes.c_char_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required",
(PyrexTypes.c_uchar_ptr_type, Builtin.unicode_type) : "Cannot convert 'char*' to unicode implicitly, decoding required",
}
def find_coercion_error(type_tuple, default, env):
err = coercion_error_dict.get(type_tuple)
if err is None:
......@@ -1250,9 +1253,8 @@ class UnicodeNode(ConstNode):
"Unicode literals do not support coercion to C types other "
"than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* "
"(for strings).")
elif dst_type is not py_object_type:
if not self.check_for_coercion_error(dst_type, env):
self.fail_assignment(dst_type)
elif dst_type not in (py_object_type, Builtin.basestring_type):
self.check_for_coercion_error(dst_type, env, fail=True)
return self
def can_coerce_to_char_literal(self):
......@@ -1337,6 +1339,7 @@ class StringNode(PyConstNode):
# return BytesNode(self.pos, value=self.value)
if not dst_type.is_pyobject:
return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env)
if dst_type is not Builtin.basestring_type:
self.check_for_coercion_error(dst_type, env, fail=True)
return self
......@@ -6677,8 +6680,10 @@ class ClassNode(ExprNode, ModuleNameMixin):
if self.doc:
code.put_error_if_neg(self.pos,
'PyDict_SetItemString(%s, "__doc__", %s)' % (
'PyDict_SetItem(%s, %s, %s)' % (
self.dict.py_result(),
code.intern_identifier(
StringEncoding.EncodedString("__doc__")),
self.doc.py_result()))
py_mod_name = self.get_py_mod_name(code)
qualname = self.get_py_qualified_name(code)
......
......@@ -1252,8 +1252,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
if base_type.scope and base_type.scope.needs_gc():
code.putln("PyObject_GC_Track(o);")
else:
code.putln("if (PyType_IS_GC(Py_TYPE(o)->tp_base))"
" PyObject_GC_Track(o);")
code.putln("#if CYTHON_COMPILING_IN_CPYTHON")
code.putln("if (PyType_IS_GC(Py_TYPE(o)->tp_base))")
code.putln("#endif")
code.putln("PyObject_GC_Track(o);")
tp_dealloc = TypeSlots.get_base_slot_function(scope, tp_slot)
if tp_dealloc is not None:
......@@ -2197,6 +2199,8 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln('#if CYTHON_COMPILING_IN_PYPY')
code.putln('Py_CLEAR(%s);' % Naming.builtins_cname)
code.putln('#endif')
code.put_decref_clear(env.module_dict_cname, py_object_type,
nanny=False, clear_before_decref=True)
def generate_main_method(self, env, code):
module_is_main = "%s%s" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))
......
......@@ -1368,8 +1368,8 @@ class CEnumDefNode(StatNode):
item.cname,
code.error_goto_if_null(temp, item.pos)))
code.put_gotref(temp)
code.putln('if (__Pyx_SetAttrString(%s, "%s", %s) < 0) %s' % (
Naming.module_cname,
code.putln('if (PyDict_SetItemString(%s, "%s", %s) < 0) %s' % (
Naming.moddict_cname,
item.name,
temp,
code.error_goto(item.pos)))
......
......@@ -285,6 +285,29 @@ class IterationTransform(Visitor.EnvTransform):
exception_value = '-1')
def _transform_unicode_iteration(self, node, slice_node, reversed=False):
if slice_node.is_literal:
# try to reduce to byte iteration for plain Latin-1 strings
try:
bytes_value = BytesLiteral(slice_node.value.encode('latin1'))
except UnicodeEncodeError:
pass
else:
bytes_slice = ExprNodes.SliceIndexNode(
slice_node.pos,
base=ExprNodes.BytesNode(
slice_node.pos, value=bytes_value,
constant_result=bytes_value,
type=PyrexTypes.c_char_ptr_type).coerce_to(
PyrexTypes.c_uchar_ptr_type, self.current_env()),
start=None,
stop=ExprNodes.IntNode(
slice_node.pos, value=len(bytes_value),
constant_result=len(bytes_value),
type=PyrexTypes.c_py_ssize_t_type),
type=Builtin.unicode_type, # hint for Python conversion
)
return self._transform_carray_iteration(node, bytes_slice, reversed)
unpack_temp_node = UtilNodes.LetRefNode(
slice_node.as_none_safe_node("'NoneType' is not iterable"))
......@@ -455,7 +478,16 @@ class IterationTransform(Visitor.EnvTransform):
counter_temp = counter.ref(node.target.pos)
if slice_base.type.is_string and node.target.type.is_pyobject:
# special case: char* -> bytes
# special case: char* -> bytes/unicode
if slice_node.type is Builtin.unicode_type:
target_value = ExprNodes.CastNode(
ExprNodes.DereferenceNode(
node.target.pos, operand=counter_temp,
type=ptr_type.base_type),
PyrexTypes.c_py_ucs4_type).coerce_to(
node.target.type, self.current_env())
else:
# char* -> bytes coercion requires slicing, not indexing
target_value = ExprNodes.SliceIndexNode(
node.target.pos,
start=ExprNodes.IntNode(node.target.pos, value='0',
......@@ -471,6 +503,7 @@ class IterationTransform(Visitor.EnvTransform):
# Allow iteration with pointer target to avoid copy.
target_value = counter_temp
else:
# TODO: can this safely be replaced with DereferenceNode() as above?
target_value = ExprNodes.IndexNode(
node.target.pos,
index=ExprNodes.IntNode(node.target.pos, value='0',
......
......@@ -962,6 +962,9 @@ class BuiltinObjectType(PyObjectType):
def assignable_from(self, src_type):
if isinstance(src_type, BuiltinObjectType):
if self.name == 'basestring':
return src_type.name in ('str', 'unicode', 'basestring')
else:
return src_type.name == self.name
elif src_type.is_extension_type:
# FIXME: This is an ugly special case that we currently
......@@ -1005,7 +1008,15 @@ class BuiltinObjectType(PyObjectType):
check = 'likely(%s(%s))' % (type_check, arg)
if not notnone:
check += '||((%s) == Py_None)' % arg
error = '(PyErr_Format(PyExc_TypeError, "Expected %s, got %%.200s", Py_TYPE(%s)->tp_name), 0)' % (self.name, arg)
if self.name == 'basestring':
name = '(PY_MAJOR_VERSION < 3 ? "basestring" : "str")'
space_for_name = 16
else:
name = '"%s"' % self.name
# avoid wasting too much space but limit number of different format strings
space_for_name = (len(self.name) // 16 + 1) * 16
error = '(PyErr_Format(PyExc_TypeError, "Expected %%.%ds, got %%.200s", %s, Py_TYPE(%s)->tp_name), 0)' % (
space_for_name, name, arg)
return check + '||' + error
def declaration_code(self, entity_code,
......
......@@ -62,7 +62,7 @@ def make_command_file(path_to_debug_info, prefix_code='', no_import=False):
return tempfilename
usage = "Usage: cygdb [options] [PATH [GDB_ARGUMENTS]]"
usage = "Usage: cygdb [options] [PATH [-- GDB_ARGUMENTS]]"
def main(path_to_debug_info=None, gdb_argv=None, no_import=False):
"""
......@@ -82,12 +82,12 @@ def main(path_to_debug_info=None, gdb_argv=None, no_import=False):
(options, args) = parser.parse_args()
if path_to_debug_info is None:
if len(args) > 1:
path_to_debug_info = args[1]
path_to_debug_info = args[0]
else:
path_to_debug_info = os.curdir
if gdb_argv is None:
gdb_argv = args[2:]
gdb_argv = args[1:]
if path_to_debug_info == '--':
no_import = True
......
......@@ -46,13 +46,13 @@
: 2012-05-02 andreasvc
: (see revision control)
"""
from libc cimport stdlib
from libc.string cimport strcat, strncat, \
memset, memchr, memcmp, memcpy, memmove
from cpython.object cimport Py_SIZE
from cpython.ref cimport PyTypeObject, Py_TYPE
from cpython.exc cimport PyErr_BadArgument
from cpython.mem cimport PyMem_Malloc, PyMem_Free
cdef extern from *: # Hard-coded utility code hack.
ctypedef class array.array [object arrayobject]
......@@ -88,50 +88,47 @@ cdef extern from *: # Hard-coded utility code hack.
arraydescr* ob_descr # struct arraydescr *ob_descr;
__data_union data
def __getbuffer__(array self, Py_buffer* info, int flags):
def __getbuffer__(self, Py_buffer* info, int flags):
# This implementation of getbuffer is geared towards Cython
# requirements, and does not yet fullfill the PEP.
# In particular strided access is always provided regardless
# of flags
cdef unsigned rows, columns, itemsize
item_count = Py_SIZE(self)
info.suboffsets = NULL
info.buf = self.data.as_chars
info.readonly = 0
info.ndim = 1
info.itemsize = itemsize = self.ob_descr.itemsize # e.g. sizeof(float)
info.itemsize = self.ob_descr.itemsize # e.g. sizeof(float)
info.len = info.itemsize * item_count
info.strides = <Py_ssize_t*> \
stdlib.malloc(sizeof(Py_ssize_t) * info.ndim * 2 + 2)
info.shape = info.strides + 1
info.shape[0] = Py_SIZE(self) # number of items
info.strides[0] = info.itemsize
info.shape = <Py_ssize_t*> PyMem_Malloc(sizeof(Py_ssize_t) + 2)
if not info.shape:
raise MemoryError()
info.shape[0] = item_count # constant regardless of resizing
info.strides = &info.itemsize
info.format = <char*>(info.strides + 2 * info.ndim)
info.format = <char*> (info.shape + 1)
info.format[0] = self.ob_descr.typecode
info.format[1] = 0
info.obj = self
def __releasebuffer__(array self, Py_buffer* info):
#if PyArray_HASFIELDS(self):
# stdlib.free(info.format)
#if sizeof(npy_intp) != sizeof(Py_ssize_t):
stdlib.free(info.strides)
def __releasebuffer__(self, Py_buffer* info):
PyMem_Free(info.shape)
array newarrayobject(PyTypeObject* type, Py_ssize_t size, arraydescr *descr)
# fast resize/realloc
# not suitable for small increments; reallocation 'to the point'
int resize(array self, Py_ssize_t n)
int resize(array self, Py_ssize_t n) except -1
# efficient for small increments (not in Py2.3-)
int resize_smart(array self, Py_ssize_t n)
int resize_smart(array self, Py_ssize_t n) except -1
cdef inline array clone(array template, Py_ssize_t length, bint zero):
""" fast creation of a new array, given a template array.
type will be same as template.
if zero is true, new array will be initialized with zeroes."""
cdef array op
op = newarrayobject(Py_TYPE(template), length, template.ob_descr)
if zero and op is not None:
memset(op.data.as_chars, 0, length * op.ob_descr.itemsize)
......@@ -139,28 +136,26 @@ cdef inline array clone(array template, Py_ssize_t length, bint zero):
cdef inline array copy(array self):
""" make a copy of an array. """
cdef array op
op = newarrayobject(Py_TYPE(self), Py_SIZE(self), self.ob_descr)
memcpy(op.data.as_chars, self.data.as_chars, Py_SIZE(op) * op.ob_descr.itemsize)
return op
cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n):
cdef inline int extend_buffer(array self, char* stuff, Py_ssize_t n) except -1:
""" efficent appending of new stuff of same type
(e.g. of same array type)
n: number of elements (not number of bytes!) """
cdef Py_ssize_t itemsize = self.ob_descr.itemsize
cdef Py_ssize_t orgsize = Py_SIZE(self)
if resize_smart(self, orgsize + n) == -1:
return -1
memcpy(self.data.as_chars + orgsize * itemsize, stuff, n * itemsize)
cdef Py_ssize_t origsize = Py_SIZE(self)
resize_smart(self, origsize + n)
memcpy(self.data.as_chars + origsize * itemsize, stuff, n * itemsize)
return 0
cdef inline int extend(array self, array other):
cdef inline int extend(array self, array other) except -1:
""" extend array with data from another array; types must match. """
if self.ob_descr.typecode != self.ob_descr.typecode:
if self.ob_descr.typecode != other.ob_descr.typecode:
PyErr_BadArgument()
return -1
return extend_buffer(self, other.data.as_chars, Py_SIZE(other))
cdef inline void zero(array op):
cdef inline void zero(array self):
""" set all elements of array to zero. """
memset(op.data.as_chars, 0, Py_SIZE(op) * op.ob_descr.itemsize)
memset(self.data.as_chars, 0, Py_SIZE(self) * self.ob_descr.itemsize)
......@@ -96,14 +96,14 @@ cdef extern from "Python.h":
# (Fortran-style if fort is 'F' or C-style otherwise) array of the
# given shape with the given number of bytes per element.
int PyBuffer_FillInfo(Py_buffer *view, void *buf,
Py_ssize_t len, int readonly,
int flags) except -1
int PyBuffer_FillInfo(Py_buffer *view, object exporter, void *buf,
Py_ssize_t len, int readonly, int flags) except -1
# Fill in a buffer-info structure, view, correctly for an exporter
# that can only share a contiguous chunk of memory of “unsigned
# bytes” of the given length. Return 0 on success and -1 (with
# raising an error) on error.
# DEPRECATED HERE: do not cimport from here, cimport from cpython.object instead
object PyObject_Format(object obj, object format_spec)
# Takes an arbitrary object and returns the result of calling
# obj.__format__(format_spec).
......@@ -285,3 +285,8 @@ cdef extern from "Python.h":
# and returns NULL if the object cannot be iterated.
Py_ssize_t Py_SIZE(object o)
object PyObject_Format(object obj, object format_spec)
# Takes an arbitrary object and returns the result of calling
# obj.__format__(format_spec).
# Added in Py2.6
......@@ -86,8 +86,7 @@ cdef PyObject* SetupContext(char* funcname, int lineno, char* filename) except N
# In that case, we don't want to be doing anything fancy
# like caching and resetting exceptions.
return NULL
cdef PyObject* type = NULL, *value = NULL, *tb = NULL
cdef PyObject* result = NULL
cdef (PyObject*) type = NULL, value = NULL, tb = NULL, result = NULL
PyThreadState_Get()
PyErr_Fetch(&type, &value, &tb)
try:
......@@ -101,7 +100,7 @@ cdef PyObject* SetupContext(char* funcname, int lineno, char* filename) except N
cdef void GOTREF(PyObject* ctx, PyObject* p_obj, int lineno):
if ctx == NULL: return
cdef PyObject* type = NULL, *value = NULL, *tb = NULL
cdef (PyObject*) type = NULL, value = NULL, tb = NULL
PyErr_Fetch(&type, &value, &tb)
try:
try:
......@@ -118,7 +117,7 @@ cdef void GOTREF(PyObject* ctx, PyObject* p_obj, int lineno):
cdef int GIVEREF_and_report(PyObject* ctx, PyObject* p_obj, int lineno):
if ctx == NULL: return 1
cdef PyObject* type = NULL, *value = NULL, *tb = NULL
cdef (PyObject*) type = NULL, value = NULL, tb = NULL
cdef bint decref_ok = False
PyErr_Fetch(&type, &value, &tb)
try:
......@@ -150,7 +149,7 @@ cdef void DECREF(PyObject* ctx, PyObject* obj, int lineno):
cdef void FinishContext(PyObject** ctx):
if ctx == NULL or ctx[0] == NULL: return
cdef PyObject* type = NULL, *value = NULL, *tb = NULL
cdef (PyObject*) type = NULL, value = NULL, tb = NULL
cdef object errors = None
cdef Context context
PyThreadState_Get()
......
......@@ -201,7 +201,7 @@ static PyObject* __Pyx_Intern(PyObject* s); /* proto */
static PyObject* __Pyx_Intern(PyObject* s) {
if (!(likely(PyString_CheckExact(s)))) {
PyErr_Format(PyExc_TypeError, "Expected str, got %s", Py_TYPE(s)->tp_name);
PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "str", Py_TYPE(s)->tp_name);
return 0;
}
Py_INCREF(s);
......
......@@ -14,7 +14,10 @@ static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed
}
if (none_allowed && obj == Py_None) return 1;
else if (exact) {
if (Py_TYPE(obj) == type) return 1;
if (likely(Py_TYPE(obj) == type)) return 1;
#if PY_MAJOR_VERSION == 2
else if ((type == &PyBaseString_Type) && __Pyx_PyBaseString_CheckExact(obj)) return 1;
#endif
}
else {
if (PyObject_TypeCheck(obj, type)) return 1;
......
......@@ -186,7 +186,7 @@
#else
#define __Pyx_PyBaseString_Check(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj) || \
PyString_Check(obj) || PyUnicode_Check(obj))
#define __Pyx_PyBaseString_CheckExact(obj) (Py_TYPE(obj) == &PyBaseString_Type)
#define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
#endif
#if PY_VERSION_HEX < 0x02060000
......
......@@ -288,7 +288,7 @@ static {{struct_type_decl}} {{funcname}}(PyObject * o) {
PyObject *value = NULL;
if (!PyMapping_Check(o)) {
PyErr_Format(PyExc_TypeError, "Expected a mapping, not %s", o->ob_type->tp_name);
PyErr_Format(PyExc_TypeError, "Expected %.16s, got %.200s", "a mapping", Py_TYPE(o)->tp_name);
goto bad;
}
......
......@@ -45,32 +45,56 @@ A very simple example of malloc usage is the following::
# return the previously allocated memory to the system
free(my_array)
One important thing to remember is that blocks of memory obtained with malloc
*must* be manually released with free when one is done with them or it won't
be reclaimed until the python process exits. This is called a memory leak.
If a chuck of memory needs a larger lifetime then can be managed by a
``try..finally`` block, another helpful idiom is to tie its lifetime to a
Python object to leverage the Python runtime's memory management, e.g.::
Note that the C-API functions for allocating memory on the Python heap
are generally preferred over the low-level C functions above as the
memory they provide is actually accounted for in Python's internal
memory management system. They also have special optimisations for
smaller memory blocks, which speeds up their allocation by avoiding
costly operating system calls.
The C-API functions can be found in the ``cpython.mem`` standard
declarations file::
from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free
Their interface and usage is identical to that of the corresponding
low-level C functions.
One important thing to remember is that blocks of memory obtained with
:c:func:`malloc` or :c:func:`PyMem_Malloc` *must* be manually released
with a corresponding call to :c:func:`free` or :c:func:`PyMem_Free`
when they are no longer used (and *must* always use the matching
type of free function). Otherwise, they won't be reclaimed until the
python process exits. This is called a memory leak.
If a chunk of memory needs a larger lifetime than can be managed by a
``try..finally`` block, another helpful idiom is to tie its lifetime
to a Python object to leverage the Python runtime's memory management,
e.g.::
cdef class SomeMemory:
cdef doube* data
cdef double* data
def __init__(self, number):
def __cinit__(self, number):
# allocate some memory (filled with random data)
self.data = <double*> malloc(number * sizeof(double))
if self.data == NULL:
self.data = <double*> PyMem_Malloc(number * sizeof(double))
if not self.data:
raise MemoryError()
def resize(self, new_number):
# Allocates new_number * sizeof(double) bytes,
# preserving the contents and making a best-effort to
# re-use the original data location.
self.data = <double*> realloc(self.data, new_number * sizeof(double))
mem = <double*> PyMem_Realloc(self.data, new_number * sizeof(double))
if not mem:
raise MemoryError()
# Only overwrite the pointer if the memory was really reallocated.
# On error (mem is NULL), the originally memory has not been freed.
self.data = mem
def __dealloc__(self, number):
if self.data != NULL:
free(self.data)
PyMem_Free(self.data) # no-op if self.data is NULL
It should be noted that Cython has special support for (multi-dimensional)
arrays of simple types via NumPy and memory views which are more full featured
......
......@@ -16,18 +16,23 @@ implicitly insert these encoding/decoding steps.
Python string types in Cython code
----------------------------------
Cython supports three Python string types: ``bytes``, ``str``
and ``unicode``. The ``str`` type is special in that it is the
byte string in Python 2 and the Unicode string in Python 3 (for Cython
code compiled with language level 2, i.e. the default). Thus, in Python
2, both ``bytes`` and ``str`` represent the byte string type,
whereas in Python 3, ``str`` and ``unicode`` represent the Python
Unicode string type. The switch is made at C compile time, the Python
version that is used to run Cython is not relevant.
When compiling Cython code with language level 3, the ``str`` type
is identified with exactly the Unicode string type at Cython compile time,
i.e. it no does not identify with ``bytes`` when running in Python 2.
Cython supports four Python string types: ``bytes``, ``str``,
``unicode`` and ``basestring``. The ``bytes`` and ``unicode`` types
are the specific types known from normal Python 2.x (named ``bytes``
and ``str`` in Python 3).
The ``str`` type is special in that it is the byte string in Python 2
and the Unicode string in Python 3 (for Cython code compiled with
language level 2, i.e. the default). Meaning, it always corresponds
exactly with the type that the Python runtime itself calls ``str``.
Thus, in Python 2, both ``bytes`` and ``str`` represent the byte string
type, whereas in Python 3, both ``str`` and ``unicode`` represent the
Python Unicode string type. The switch is made at C compile time, the
Python version that is used to run Cython is not relevant.
When compiling Cython code with language level 3, the ``str`` type is
identified with exactly the Unicode string type at Cython compile time,
i.e. it does not identify with ``bytes`` when running in Python 2.
Note that the ``str`` type is not compatible with the ``unicode``
type in Python 2, i.e. you cannot assign a Unicode string to a variable
......@@ -40,6 +45,17 @@ and users normally expect code to be able to work with both. Code that
only targets Python 3 can safely type variables and arguments as either
``bytes`` or ``unicode``.
The ``basestring`` type represents both the types ``str`` and ``unicode``,
i.e. all Python text string types in Python 2 and Python 3. This can be
used for typing text variables that normally contain Unicode text (at
least in Python 3) but must additionally accept the ``str`` type in
Python 2 for backwards compatibility reasons. It is not compatible with
the ``bytes`` type. Its usage should be rare in normal Cython code as
the generic ``object`` type (i.e. untyped code) will normally be good
enough and has the additional advantage of supporting the assignment of
string subtypes. Support for the ``basestring`` type is new in Cython
0.20.
General notes about C strings
-----------------------------
......
......@@ -20,9 +20,8 @@ Most of these things that fall more into the implementation details rather
than semantics, and we may decide not to fix (or require a --pedantic flag to get).
==========
Nested tuple argument unpacking.
==========
Nested tuple argument unpacking
===============================
::
......@@ -32,9 +31,8 @@ Nested tuple argument unpacking.
This was removed in Python 3.
==========
Inspect support
==========
===============
While it is quite possible to emulate the interface of functions in
Cython's own function type, and recent Cython releases have seen several
......@@ -45,9 +43,8 @@ base class. This has a negative impact on code that uses inspect to
inspect function objects, but would require a change to Python itself.
==========
Stack frames
==========
============
Currently we generate fake tracebacks as part of exception propagation,
but don't fill in locals and can't fill in co_code.
......@@ -55,18 +52,15 @@ To be fully compatible, we would have to generate these stack frame objects at
function call time (with a potential performance penalty). We may have an
option to enable this for debugging.
==========
Identity vs. equality for inferred literals.
==========
::
a = 1.0 # a inferred to be double
b = c = None # a inferred to be type object
if some_runtime_expression:
b = a
c = a
print b is c # py float created twice
Identity vs. equality for inferred literals
===========================================
::
a = 1.0 # a inferred to be C type 'double'
b = c = None # b and c inferred to be type 'object'
if some_runtime_expression:
b = a # creates a new Python float object
c = a # creates a new Python float object
print b is c # most likely not the same object
......@@ -15,6 +15,9 @@ cdef char* c2 = b"abc"
cdef bytes b2 = c1
cdef char* c3 = b1
cdef basestring bs1 = "abc"
cdef basestring bs2 = u"abc"
cdef object o1 = "abc"
cdef object o2 = b"abc"
cdef object o3 = u"abc"
......@@ -24,6 +27,10 @@ o5 = b1
o6 = s1
o7 = u1
o8 = cu1
o9 = bs1
u1 = bs1
s1 = bs1
# errors:
cdef char* c_f1 = u"abc"
......@@ -38,6 +45,7 @@ cdef Py_UNICODE* cu_f4 = b"abc"
cdef bytes b_f1 = u"abc"
cdef bytes b_f2 = u1
cdef bytes b_f3 = s1
cdef bytes b_f4 = bs1
cdef str s_f1 = b"abc"
cdef str s_f2 = b1
......@@ -50,6 +58,9 @@ cdef unicode u_f3 = b"abc"
cdef unicode u_f4 = b1
cdef unicode u_f5 = c1
cdef basestring bs_f1 = b"abc"
cdef basestring bs_f2 = b1
cdef tuple t_f1 = "abc"
cdef tuple t_f2 = u"abc"
cdef tuple t_f3 = b"abc"
......@@ -64,36 +75,40 @@ print <unicode>c1
print <unicode>c1[1:2]
_ERRORS = u"""
29:20: Unicode literals do not support coercion to C types other than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* (for strings).
30:22: Unicode objects only support coercion to Py_UNICODE*.
31:22: 'str' objects do not support coercion to C types (use 'bytes'?).
33:27: Cannot assign type 'char *' to 'Py_UNICODE *'
34:27: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
35:27: 'str' objects do not support coercion to C types (use 'unicode'?).
36:25: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
38:20: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
39:22: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
40:22: Cannot convert 'str' to 'bytes' implicitly. This is not portable.
42:17: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
43:19: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
44:17: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
45:19: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
47:20: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
48:22: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
49:20: Cannot convert 'bytes' object to unicode implicitly, decoding required
50:22: Cannot convert 'bytes' object to unicode implicitly, decoding required
51:22: Cannot convert 'char*' to unicode implicitly, decoding required
53:19: Cannot assign type 'str object' to 'tuple object'
54:18: Cannot assign type 'unicode object' to 'tuple object'
55:18: Cannot assign type 'bytes object' to 'tuple object'
61:13: default encoding required for conversion from 'char *' to 'str object'
62:13: default encoding required for conversion from 'char *' to 'str object'
63:17: Cannot convert 'char*' to unicode implicitly, decoding required
64:17: default encoding required for conversion from 'char *' to 'unicode object'
36:20: Unicode literals do not support coercion to C types other than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* (for strings).
37:22: Unicode objects only support coercion to Py_UNICODE*.
38:22: 'str' objects do not support coercion to C types (use 'bytes'?).
40:27: Cannot assign type 'char *' to 'Py_UNICODE *'
41:27: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
42:27: 'str' objects do not support coercion to C types (use 'unicode'?).
43:25: Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.
45:20: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
46:22: Cannot convert Unicode string to 'bytes' implicitly, encoding required.
47:22: Cannot convert 'str' to 'bytes' implicitly. This is not portable.
48:23: Cannot convert 'basestring' object to bytes implicitly. This is not portable.
50:17: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
51:19: Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.
52:17: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
53:19: Cannot convert Unicode string to 'str' implicitly. This is not portable and requires explicit encoding.
55:20: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
56:22: str objects do not support coercion to unicode, use a unicode string literal instead (u'')
57:20: Cannot convert 'bytes' object to unicode implicitly, decoding required
58:22: Cannot convert 'bytes' object to unicode implicitly, decoding required
59:22: Cannot convert 'char*' to unicode implicitly, decoding required
61:24: Cannot convert 'bytes' object to basestring implicitly. This is not portable to Py3.
62:26: Cannot convert 'bytes' object to basestring implicitly. This is not portable to Py3.
64:19: Cannot assign type 'str object' to 'tuple object'
65:18: Cannot assign type 'unicode object' to 'tuple object'
66:18: Cannot assign type 'bytes object' to 'tuple object'
72:13: default encoding required for conversion from 'char *' to 'str object'
73:13: default encoding required for conversion from 'char *' to 'str object'
74:17: Cannot convert 'char*' to unicode implicitly, decoding required
75:17: default encoding required for conversion from 'char *' to 'unicode object'
"""
......@@ -37,3 +37,50 @@ def unicode_subtypes_basestring():
True
"""
return issubclass(unicode, basestring)
def basestring_typed_variable(obj):
"""
>>> basestring_typed_variable(None) is None
True
>>> basestring_typed_variable(ustring) is ustring
True
>>> basestring_typed_variable(sstring) is sstring
True
>>> if IS_PY3: print(True)
... else: print(basestring_typed_variable(bstring) is bstring)
True
>>> class S(str): pass
>>> basestring_typed_variable(S()) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError: ...got S...
"""
cdef basestring s
s = u'abc'
assert s
s = 'abc'
assert s
# make sure coercion also works in conditional expressions
s = u'abc' if obj else 'abc'
assert s
s = obj
return s
def basestring_typed_argument(basestring obj):
"""
>>> basestring_typed_argument(None) is None
True
>>> basestring_typed_argument(ustring) is ustring
True
>>> basestring_typed_argument(sstring) is sstring
True
>>> if IS_PY3: print(True)
... else: print(basestring_typed_argument(bstring) is bstring)
True
>>> class S(str): pass
>>> basestring_typed_argument(S()) # doctest: +ELLIPSIS
Traceback (most recent call last):
TypeError: ...got S...
"""
return obj
......@@ -291,7 +291,7 @@ def loop_over_unicode_literal():
"""
# Py_UCS4 can represent any Unicode character
for uchar in 'abcdefg':
pass
assert uchar in 'abcdefg'
return cython.typeof(uchar)
def list_comp():
......
......@@ -209,14 +209,31 @@ def count_lower_case_characters_slice_reversed(unicode ustring):
count += 1
return count
def loop_object_over_latin1_unicode_literal():
"""
>>> result = loop_object_over_latin1_unicode_literal()
>>> print(result[:-1])
abcdefg
>>> ord(result[-1]) == 0xD7
True
"""
cdef object uchar
chars = []
for uchar in u'abcdefg\xD7':
chars.append(uchar)
return u''.join(chars)
def loop_object_over_unicode_literal():
"""
>>> print(loop_object_over_unicode_literal())
>>> result = loop_object_over_unicode_literal()
>>> print(result[:-1])
abcdefg
>>> ord(result[-1]) == 0xF8FD
True
"""
cdef object uchar
chars = []
for uchar in u'abcdefg':
for uchar in u'abcdefg\uF8FD':
chars.append(uchar)
return u''.join(chars)
......
......@@ -147,8 +147,15 @@ def test_extend():
"""
cdef array.array ca = array.array('i', [1, 2, 3])
cdef array.array cb = array.array('i', [4, 5])
cdef array.array cf = array.array('f', [1.0, 2.0, 3.0])
array.extend(ca, cb)
assert list(ca) == [1, 2, 3, 4, 5], list(ca)
try:
array.extend(ca, cf)
except TypeError:
pass
else:
assert False, 'extending incompatible array types did not raise'
def test_likes(a):
"""
......
......@@ -87,7 +87,7 @@ def test_obj_to_struct(MyStruct mystruct):
>>> test_obj_to_struct(None)
Traceback (most recent call last):
...
TypeError: Expected a mapping, not NoneType
TypeError: Expected a mapping, got NoneType
>>> test_obj_to_struct(dict(s=b"world"))
Traceback (most recent call last):
...
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment