Commit 677efaa6 authored by Stefan Behnel's avatar Stefan Behnel

optimise 1-arg/no-args calls to PyCFunction by avoiding tuple packing if the function allows it

--HG--
extra : transplant_source : %E7%F6%F6%0F%80%3A%B2%BDs%7E%8D%80p%2AYX%3B%08%DBL
parent e9b8307e
......@@ -56,6 +56,8 @@ Features added
Optimizations
-------------
* Simple calls to C implemented Python functions/methods are faster.
* The "and"/"or" operators try to avoid unnecessary coercions of their
arguments. They now evaluate the truth value of each argument
independently and only coerce the final result of the whole expression
......@@ -71,8 +73,6 @@ Optimizations
* Calls to ``slice()`` are translated to a straight C-API call.
* Simple Python method calls are about 10% faster.
Bugs fixed
----------
......
......@@ -4701,15 +4701,24 @@ class SimpleCallNode(CallNode):
if self.function.entry and self.function.entry.utility_code:
code.globalstate.use_utility_code(self.function.entry.utility_code)
if func_type.is_pyobject:
arg_code = self.arg_tuple.py_result()
code.globalstate.use_utility_code(UtilityCode.load_cached(
"PyObjectCall", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
self.result(),
self.function.py_result(),
arg_code,
code.error_goto_if_null(self.result(), self.pos)))
if func_type is not type_type and not self.arg_tuple.args and self.arg_tuple.is_literal:
code.globalstate.use_utility_code(UtilityCode.load_cached(
"PyObjectCallNoArg", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_CallNoArg(%s); %s" % (
self.result(),
self.function.py_result(),
code.error_goto_if_null(self.result(), self.pos)))
else:
arg_code = self.arg_tuple.py_result()
code.globalstate.use_utility_code(UtilityCode.load_cached(
"PyObjectCall", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
self.result(),
self.function.py_result(),
arg_code,
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
elif func_type.is_cfunction:
if self.has_optional_args:
......@@ -4800,14 +4809,14 @@ class PyMethodCallNode(SimpleCallNode):
self.allocate_temp_result(code)
self.function.generate_evaluation_code(code)
assert self.arg_tuple.mult_factor is None
args = self.arg_tuple.args
for arg in args:
arg.generate_evaluation_code(code)
self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=bool(args))
function = code.funcstate.allocate_temp(py_object_type, manage_ref=False)
arg_offset = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
code.putln("%s = 0;" % arg_offset)
code.putln("%s = %s;" % (function, self.function.py_result()))
......@@ -4818,47 +4827,72 @@ class PyMethodCallNode(SimpleCallNode):
# the following is always true in Py3 (kept only for safety),
# but is false for unbound methods in Py2
code.putln("if (likely(%s)) {" % self_arg)
code.put_incref(self_arg, py_object_type)
if args:
code.put_incref(self_arg, py_object_type)
code.putln("%s = PyMethod_GET_FUNCTION(%s);" % (function, function))
code.putln("%s = 1;" % arg_offset)
code.putln("}")
code.putln("}")
code.putln("%s = PyTuple_New(%d+%s); %s" % (
args_tuple, len(args), arg_offset,
code.error_goto_if_null(args_tuple, self.pos)))
code.put_gotref(args_tuple)
if not args:
# fastest special case: try to avoid tuple creation
code.putln("if (%s == 1) {" % arg_offset)
code.funcstate.release_temp(arg_offset)
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectCallOneArg", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_CallOneArg(%s, %s); %s" % (
self.result(),
function, self_arg,
code.error_goto_if_null(self.result(), self.pos)))
code.funcstate.release_temp(self_arg) # borrowed ref in this case
code.putln("} else {")
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectCallNoArg", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_CallNoArg(%s); %s" % (
self.result(),
function,
code.error_goto_if_null(self.result(), self.pos)))
code.putln("}")
code.put_gotref(self.py_result())
else:
args_tuple = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
code.putln("%s = PyTuple_New(%d+%s); %s" % (
args_tuple, len(args), arg_offset,
code.error_goto_if_null(args_tuple, self.pos)))
code.put_gotref(args_tuple)
code.putln("if (%s == 1) {" % arg_offset)
code.putln("PyTuple_SET_ITEM(%s, 0, %s); __Pyx_GIVEREF(%s); %s = NULL;" % (
args_tuple, self_arg, self_arg, self_arg))
code.funcstate.release_temp(self_arg)
code.putln("}")
code.putln("if (%s == 1) {" % arg_offset)
code.putln("PyTuple_SET_ITEM(%s, 0, %s); __Pyx_GIVEREF(%s); %s = NULL;" % (
args_tuple, self_arg, self_arg, self_arg)) # stealing owned ref in this case
code.funcstate.release_temp(self_arg)
code.putln("}")
for i, arg in enumerate(args):
arg.make_owned_reference(code)
code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % (
args_tuple, i, arg_offset, arg.py_result()))
code.put_giveref(arg.py_result())
code.funcstate.release_temp(arg_offset)
for i, arg in enumerate(args):
arg.make_owned_reference(code)
code.putln("PyTuple_SET_ITEM(%s, %d+%s, %s);" % (
args_tuple, i, arg_offset, arg.py_result()))
code.put_giveref(arg.py_result())
code.funcstate.release_temp(arg_offset)
for arg in args:
arg.generate_post_assignment_code(code)
arg.free_temps(code)
for arg in args:
arg.generate_post_assignment_code(code)
arg.free_temps(code)
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
self.result(),
function, args_tuple,
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
code.globalstate.use_utility_code(
UtilityCode.load_cached("PyObjectCall", "ObjectHandling.c"))
code.putln(
"%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
self.result(),
function, args_tuple,
code.error_goto_if_null(self.result(), self.pos)))
code.put_gotref(self.py_result())
code.put_decref_clear(args_tuple, py_object_type)
code.funcstate.release_temp(args_tuple)
code.funcstate.release_temp(function)
code.put_decref_clear(args_tuple, py_object_type)
code.funcstate.release_temp(args_tuple)
code.funcstate.release_temp(function)
self.function.generate_disposal_code(code)
self.function.free_temps(code)
......
......@@ -3754,7 +3754,7 @@ class FinalOptimizePhase(Visitor.CythonTransform):
Replace likely Python method calls by a specialised PyMethodCallNode.
"""
self.visitchildren(node)
if node.function.type.is_cfunction and isinstance(node.function, ExprNodes.NameNode):
if node.function.type.is_cfunction and node.function.is_name:
if node.function.name == 'isinstance' and len(node.args) == 2:
type_arg = node.args[1]
if type_arg.type.is_builtin_type and type_arg.type.name == 'type':
......@@ -3763,11 +3763,11 @@ class FinalOptimizePhase(Visitor.CythonTransform):
node.function.type = node.function.entry.type
PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type)
node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr)
elif node.function.type.is_pyobject:
elif node.function.type.is_pyobject and node.function.type is not Builtin.type_type:
# we could do it for all calls, but attributes are most likely to result in a method call
if node.function.is_attribute:
if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not (
node.arg_tuple.is_literal or node.arg_tuple.mult_factor):
node.arg_tuple.mult_factor or (node.arg_tuple.is_literal and node.arg_tuple.args)):
node = ExprNodes.PyMethodCallNode.from_node(
node, function=node.function, arg_tuple=node.arg_tuple, type=node.type)
return node
......
......@@ -1158,6 +1158,90 @@ static CYTHON_INLINE PyObject* __Pyx_PyObject_Call(PyObject *func, PyObject *arg
#endif
/////////////// PyObjectCallOneArg.proto ///////////////
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg); /*proto*/
/////////////// PyObjectCallOneArg ///////////////
//@requires: PyObjectCall
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
PyObject *self, *result;
PyCFunction cfunc;
if (!(PyCFunction_Check(func)
#ifdef __Pyx_CyFunction_USED
|| PyObject_TypeCheck(func, __pyx_CyFunctionType)
#endif
) || !(PyCFunction_GET_FLAGS(func) & METH_O)) {
PyObject* args = PyTuple_Pack(1, arg);
if (unlikely(!args)) return NULL;
result = __Pyx_PyObject_Call(func, args, NULL);
Py_DECREF(args);
return result;
}
// fast and simple case we are optimising for
cfunc = PyCFunction_GET_FUNCTION(func);
self = PyCFunction_GET_SELF(func);
if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
return NULL;
result = cfunc(self, arg);
Py_LeaveRecursiveCall();
if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
PyErr_SetString(
PyExc_SystemError,
"NULL result without error in PyObject_Call");
}
return result;
}
#else
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallOneArg(PyObject *func, PyObject *arg) {
PyObject* args = PyTuple_Pack(1, arg);
return (likely(args)) ? __Pyx_PyObject_Call(func, args, NULL) : NULL;
}
#endif
/////////////// PyObjectCallNoArg.proto ///////////////
//@requires: PyObjectCall
//@substitute: naming
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func); /*proto*/
#else
#define __Pyx_PyObject_CallNoArg(func) __Pyx_PyObject_Call(func, $empty_tuple, NULL)
#endif
/////////////// PyObjectCallNoArg ///////////////
//@requires: PyObjectCall
//@substitute: naming
#if CYTHON_COMPILING_IN_CPYTHON
static CYTHON_INLINE PyObject* __Pyx_PyObject_CallNoArg(PyObject *func) {
PyObject *self, *result;
PyCFunction cfunc;
if (!PyCFunction_Check(func) || !(PyCFunction_GET_FLAGS(func) & METH_NOARGS)) {
return __Pyx_PyObject_Call(func, $empty_tuple, NULL);
}
cfunc = PyCFunction_GET_FUNCTION(func);
self = PyCFunction_GET_SELF(func);
if (unlikely(Py_EnterRecursiveCall((char*)" while calling a Python object")))
return NULL;
result = cfunc(self, NULL);
Py_LeaveRecursiveCall();
if (unlikely(!result) && unlikely(!PyErr_Occurred())) {
PyErr_SetString(
PyExc_SystemError,
"NULL result without error in PyObject_Call");
}
return result;
}
#endif
/////////////// MatrixMultiply.proto ///////////////
#if PY_VERSION_HEX >= 0x03050000
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment