Commit 19ed7064 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Low-tech optimization: add xdecrefAll() function

For use on the exception path.  Rather than emitting the instructions
for a bunch of decrefs, instead just emit a single call to xdecrefAll()

Improves performance quite a lot: sre_parse_parse llvm instructions goes
from 360k to 80k, and it's 60k if I manually disable cxx fixups.
Overall time [which is entirely compile time] goes from 12.5s to 4.4s,
though master does it in 1.3s so there's still some work to do.  But
even if I turn off cxx fixups entirely it still takes 2.9s.
parent 21b20e3b
......@@ -365,14 +365,14 @@ void addCXXFixup(llvm::Instruction* inst, const llvm::SmallVector<llvm::Tracking
std::tie(exc_type, exc_value, exc_traceback) = createLandingpad(fixup_block);
llvm::IRBuilder<true> builder(fixup_block);
auto rethrow = builder.CreateCall3(g.funcs.rawReraise, exc_type, exc_value, exc_traceback);
builder.CreateUnreachable();
// fixup_block->dump();
llvm::SmallVector<llvm::Value*, 4> decref_args;
decref_args.push_back(getConstantInt(to_decref.size(), g.i32));
decref_args.append(to_decref.begin(), to_decref.end());
builder.CreateCall(g.funcs.xdecrefAll, decref_args);
for (auto&& v : to_decref) {
addDecrefs(v, rt->isNullable(v), 1, rethrow);
}
auto rethrow = builder.CreateCall3(g.funcs.rawReraise, exc_type, exc_value, exc_traceback);
builder.CreateUnreachable();
// new_invoke->getParent()->getParent()->dump();
}
......
......@@ -323,6 +323,7 @@ void initGlobalFuncs(GlobalState& g) {
GET(reraiseCapiExcAsCxx);
GET(deopt);
GET(checkRefs);
GET(xdecrefAll);
GET(div_float_float);
GET(floordiv_float_float);
......
......@@ -54,7 +54,7 @@ struct GlobalFuncs {
llvm::Value* raise0, *raise0_capi, *raise3, *raise3_capi, *rawReraise;
llvm::Value* PyErr_Fetch, *PyErr_NormalizeException, *PyErr_Restore, *caughtCapiException, *reraiseCapiExcAsCxx;
llvm::Value* deopt;
llvm::Value* checkRefs;
llvm::Value* checkRefs, *xdecrefAll;
llvm::Value* div_float_float, *floordiv_float_float, *mod_float_float, *pow_float_float;
......
......@@ -138,6 +138,7 @@ void force() {
FORCE(reraiseCapiExcAsCxx);
FORCE(deopt);
FORCE(checkRefs);
FORCE(xdecrefAll);
FORCE(div_i64_i64);
FORCE(mod_i64_i64);
......
......@@ -118,6 +118,18 @@ static inline Box* callattrInternal3(Box* obj, BoxedString* attr, LookupScope sc
return callattrInternal<S, rewritable>(obj, attr, scope, rewrite_args, argspec, arg1, arg2, arg3, NULL, NULL);
}
extern "C" void xdecrefAll(int num, ...) {
va_list va;
va_start(va, num);
for (int i = 0; i < num; i++) {
Box* b = va_arg(va, Box*);
Py_XDECREF(b);
}
va_end(va);
}
extern "C" Box* deopt(AST_expr* expr, Box* value) {
STAT_TIMER(t0, "us_timer_deopt", 10);
......
......@@ -229,5 +229,6 @@ extern "C" void boxedLocalsDel(Box* boxedLocals, BoxedString* attr);
extern "C" void checkRefs(Box* b); // asserts that b has >= 0 refs
extern "C" Box* assertAlive(Box* b); // asserts that b has > 0 refs, and returns b
extern "C" void xdecrefAll(int num, ...);
}
#endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment