Commit 1e81abf9 authored by Robert Bradshaw's avatar Robert Bradshaw

Allow setting a default encoding to ease str/unicode <-> c string conversion.

parent c06eadaa
This diff is collapsed.
...@@ -556,7 +556,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -556,7 +556,17 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("#endif") code.putln("#endif")
code.putln("") code.putln("")
code.put(UtilityCode.load_as_string("UtilityFunctionPredeclarations", "ModuleSetupCode.c")[0]) code.put(UtilityCode.load_as_string("UtilityFunctionPredeclarations", "ModuleSetupCode.c")[0])
c_string_type = env.directives['c_string_type']
c_string_encoding = env.directives['c_string_encoding']
if c_string_type != 'bytes' and not c_string_encoding:
error(self.pos, "a default encoding must be provided if c_string_type != bytes")
code.putln('#define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII %s' % int(c_string_encoding == 'ascii'))
code.putln('#define __PYX_DEFAULT_STRING_ENCODING "%s"' % c_string_encoding)
code.putln('#define __Pyx_PyObject_FromString __Pyx_Py%s_FromString' % c_string_type.title())
code.putln('#define __Pyx_PyObject_FromStringAndSize __Pyx_Py%s_FromStringAndSize' % c_string_type.title())
code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0]) code.put(UtilityCode.load_as_string("TypeConversions", "TypeConversion.c")[0])
code.put(Nodes.branch_prediction_macros) code.put(Nodes.branch_prediction_macros)
code.putln('') code.putln('')
code.putln('static PyObject *%s;' % env.module_cname) code.putln('static PyObject *%s;' % env.module_cname)
...@@ -1888,6 +1898,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode): ...@@ -1888,6 +1898,10 @@ class ModuleNode(Nodes.Node, Nodes.BlockNode):
code.putln("/*--- Initialize various global constants etc. ---*/") code.putln("/*--- Initialize various global constants etc. ---*/")
code.putln(code.error_goto_if_neg("__Pyx_InitGlobals()", self.pos)) code.putln(code.error_goto_if_neg("__Pyx_InitGlobals()", self.pos))
code.putln("#ifdef __PYX_DEFAULT_STRING_ENCODING_IS_ASCII")
code.putln("if (__Pyx_init_sys_getdefaultencoding_not_ascii() < 0) %s" % code.error_goto(self.pos))
code.putln("#endif")
__main__name = code.globalstate.get_py_string_const( __main__name = code.globalstate.get_py_string_const(
EncodedString("__main__"), identifier=True) EncodedString("__main__"), identifier=True)
code.putln("if (%s%s) {" % (Naming.module_is_main, self.full_module_name.replace('.', '__'))) code.putln("if (%s%s) {" % (Naming.module_is_main, self.full_module_name.replace('.', '__')))
......
...@@ -95,6 +95,8 @@ directive_defaults = { ...@@ -95,6 +95,8 @@ directive_defaults = {
'language_level': 2, 'language_level': 2,
'fast_getattr': False, # Undocumented until we come up with a better way to handle this everywhere. 'fast_getattr': False, # Undocumented until we come up with a better way to handle this everywhere.
'py2_import': False, # For backward compatibility of Cython's source code in Py3 source mode 'py2_import': False, # For backward compatibility of Cython's source code in Py3 source mode
'c_string_type': 'bytes',
'c_string_encoding': '',
# set __file__ and/or __path__ to known source/target path at import time (instead of not having them available) # set __file__ and/or __path__ to known source/target path at import time (instead of not having them available)
'set_initial_path' : None, # SOURCEFILE or "/full/path/to/module" 'set_initial_path' : None, # SOURCEFILE or "/full/path/to/module"
...@@ -160,6 +162,9 @@ directive_scopes = { # defaults to available everywhere ...@@ -160,6 +162,9 @@ directive_scopes = { # defaults to available everywhere
'set_initial_path' : ('module',), 'set_initial_path' : ('module',),
'test_assert_path_exists' : ('function', 'class', 'cclass'), 'test_assert_path_exists' : ('function', 'class', 'cclass'),
'test_fail_if_path_exists' : ('function', 'class', 'cclass'), 'test_fail_if_path_exists' : ('function', 'class', 'cclass'),
# Avoid scope-specific to/from_py_functions.
'c_string_type': ('module',),
'c_string_encoding': ('module',),
} }
def parse_directive_value(name, value, relaxed_bool=False): def parse_directive_value(name, value, relaxed_bool=False):
......
...@@ -2179,13 +2179,13 @@ class CPointerBaseType(CType): ...@@ -2179,13 +2179,13 @@ class CPointerBaseType(CType):
if self.is_string and not base_type.is_error: if self.is_string and not base_type.is_error:
if base_type.signed: if base_type.signed:
self.to_py_function = "PyBytes_FromString" self.to_py_function = "__Pyx_PyObject_FromString"
if self.is_ptr: if self.is_ptr:
self.from_py_function = "PyBytes_AsString" self.from_py_function = "__Pyx_PyObject_AsString"
else: else:
self.to_py_function = "__Pyx_PyBytes_FromUString" self.to_py_function = "__Pyx_PyObject_FromUString"
if self.is_ptr: if self.is_ptr:
self.from_py_function = "__Pyx_PyBytes_AsUString" self.from_py_function = "__Pyx_PyObject_AsUString"
self.exception_value = "NULL" self.exception_value = "NULL"
def py_type_name(self): def py_type_name(self):
......
...@@ -7,10 +7,13 @@ cdef extern from *: ...@@ -7,10 +7,13 @@ cdef extern from *:
cdef cppclass string "std::string": cdef cppclass string "std::string":
string() string()
string(char* c_str, size_t size) string(char* c_str, size_t size)
cdef char* __Pyx_PyObject_AsStringAndSize(object, Py_ssize_t*)
@cname("{{cname}}") @cname("{{cname}}")
cdef string {{cname}}(object o) except *: cdef string {{cname}}(object o) except *:
return string(<char*>o, len(o)) cdef Py_ssize_t length
cdef char* data = __Pyx_PyObject_AsStringAndSize(o, &length)
return string(data, length)
#################### string.to_py #################### #################### string.to_py ####################
...@@ -21,10 +24,11 @@ cdef extern from *: ...@@ -21,10 +24,11 @@ cdef extern from *:
cdef cppclass string "const std::string": cdef cppclass string "const std::string":
char* data() char* data()
size_t size() size_t size()
cdef object __Pyx_PyObject_FromStringAndSize(char*, size_t)
@cname("{{cname}}") @cname("{{cname}}")
cdef object {{cname}}(string& s): cdef object {{cname}}(string& s):
return s.data()[:s.size()] return __Pyx_PyObject_FromStringAndSize(s.data(), s.size())
#################### vector.from_py #################### #################### vector.from_py ####################
......
...@@ -2,8 +2,27 @@ ...@@ -2,8 +2,27 @@
/* Type Conversion Predeclarations */ /* Type Conversion Predeclarations */
#define __Pyx_PyBytes_FromUString(s) PyBytes_FromString((char*)s) static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
#define __Pyx_PyBytes_AsUString(s) ((unsigned char*) PyBytes_AsString(s)) static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
#define __Pyx_PyBytes_FromString PyBytes_FromString
#define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char*);
#define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
#if PY_VERSION_HEX < 0x03000000
#define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString
#define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
#else
#define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString
#define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
#endif
#define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s))
#define __Pyx_PyObject_FromUString(s) __Pyx_PyObject_FromString((char*)s)
#define __Pyx_PyBytes_FromUString(s) __Pyx_PyBytes_FromString((char*)s)
#define __Pyx_PyStr_FromUString(s) __Pyx_PyStr_FromString((char*)s)
#define __Pyx_PyUnicode_FromUString(s) __Pyx_PyUnicode_FromString((char*)s)
#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None) #define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False)) #define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
...@@ -21,10 +40,129 @@ static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*); ...@@ -21,10 +40,129 @@ static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
#endif #endif
#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) #define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
#if PY_VERSION_HEX < 0x03000000 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
static int __Pyx_sys_getdefaultencoding_not_ascii;
static int __Pyx_init_sys_getdefaultencoding_not_ascii() {
PyObject* sys = NULL;
PyObject* default_encoding = NULL;
PyObject* codecs = NULL;
PyObject* normalized_encoding = NULL;
PyObject* normalized_encoding_name = NULL;
sys = PyImport_ImportModule("sys");
if (sys == NULL) goto bad;
default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
if (default_encoding == NULL) goto bad;
if (strcmp(PyBytes_AsString(default_encoding), "ascii") == 0) {
__Pyx_sys_getdefaultencoding_not_ascii = 0;
} else {
char* normalized_encoding_c;
codecs = PyImport_ImportModule("codecs");
printf("codecs %p\n", codecs);
if (codecs == NULL) goto bad;
normalized_encoding = PyObject_CallMethod(codecs, (char*) (const char*) "lookup", (char*) (const char*) "O", default_encoding);
printf("normalized_encoding %p\n", normalized_encoding);
if (normalized_encoding == NULL) goto bad;
normalized_encoding_name = PyObject_GetAttrString(normalized_encoding, (char*) (const char*) "name");
printf("normalized_encoding_name %p\n", normalized_encoding_name);
if (normalized_encoding_name == NULL) goto bad;
normalized_encoding_c = PyBytes_AsString(normalized_encoding_name);
printf("normalized_encoding_c %s\n", normalized_encoding_c);
if (normalized_encoding_c == NULL) goto bad;
__Pyx_sys_getdefaultencoding_not_ascii = strcmp(normalized_encoding_c, "ascii");
if (!__Pyx_sys_getdefaultencoding_not_ascii) {
int ascii_compatible =
(strncmp(normalized_encoding_c, "iso8859-", 8) == 0 ||
strcmp(normalized_encoding_c, "macroman") == 0 ||
strcmp(normalized_encoding_c, "utf-8") == 0);
// I've never heard of a system where this happens, but it might...
if (!ascii_compatible) {
PyErr_Format(
PyExc_ValueError,
"This module compiled with c_string_encoding=ascii, but default encoding '%s' is not a superset of ascii.",
normalized_encoding_c);
goto bad;
}
}
}
printf("__Pyx_sys_getdefaultencoding_not_ascii %d\n", __Pyx_sys_getdefaultencoding_not_ascii);
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
Py_XDECREF(codecs);
Py_XDECREF(normalized_encoding);
Py_XDECREF(normalized_encoding_name);
return 0;
bad:
Py_XDECREF(sys);
Py_XDECREF(default_encoding);
Py_XDECREF(codecs);
Py_XDECREF(normalized_encoding);
Py_XDECREF(normalized_encoding_name);
return -1;
}
#else
#define __Pyx_init_sys_getdefaultencoding_not_ascii() 0
#endif
/////////////// TypeConversions /////////////// /////////////// TypeConversions ///////////////
/* Type Conversion Functions */ /* Type Conversion Functions */
static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(char* c_str) {
return __Pyx_PyUnicode_FromStringAndSize(c_str, strlen(c_str));
}
static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
Py_ssize_t ignore;
return __Pyx_PyObject_AsStringAndSize(o, &ignore);
}
static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
#if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
if (
#if PY_VERSION_HEX < 0x03000000
__Pyx_sys_getdefaultencoding_not_ascii &&
#endif
PyUnicode_Check(o)) {
#if PY_VERSION_HEX < 0x03030000
// borrowed, cached reference
PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
char* maybe_ascii = PyBytes_AS_STRING(defenc);
char* end = maybe_ascii + PyBytes_GET_SIZE(defenc);
for (char* c = maybe_ascii; c < end; c++) {
if ((unsigned char) (*c) >= 128) {
// raise the error
PyUnicode_AsASCIIString(o);
return NULL;
}
}
*length = PyBytes_GET_SIZE(defenc);
return maybe_ascii;
#else /* PY_VERSION_HEX < 0x03030000 */
PyUnicode_READY(o);
if (PyUnicode_IS_ASCIII(o)) {
// cached for the lifetime of the object
*length = PyUnicode_GET_DATA_SIZE(o);
return PyUnicode_AsUTF8(o);
} else {
// raise the error
PyUnicode_AsASCIIString(o);
return NULL;
}
#endif /* PY_VERSION_HEX < 0x03030000 */
} else
#endif /* __PYX_DEFAULT_STRING_ENCODING_IS_ASCII */
{
char* result;
int r = PyBytes_AsStringAndSize(o, &result, length);
if (r < 0) {
return NULL;
} else {
return result;
}
}
}
/* Note: __Pyx_PyObject_IsTrue is written to minimize branching. */ /* Note: __Pyx_PyObject_IsTrue is written to minimize branching. */
static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
int is_true = x == Py_True; int is_true = x == Py_True;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment