Commit 24140771 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #579 from kmod/string_allocs

Reduce string allocations
parents 412730c4 abd87d35
import sre_compile
import sre_constants
import sre_parse
import re
IN = sre_constants.IN
......@@ -7,26 +8,21 @@ IN = sre_constants.IN
def _identity(x):
return x
_cache = {}
DEBUG = 1
def _compile(*key):
# print "re._compile", key
# internal: compile pattern
pattern, flags = key
bypass_cache = flags & DEBUG
if not bypass_cache:
cachekey = (type(key[0]),) + key
p = _cache.get(cachekey)
if p is not None:
# print "got from cache"
return p
p = (1, 0)
_cache[cachekey] = p
return p
for i in xrange(1000000):
# sre_compile._optimize_charset([('negate', None), ('literal', 34), ('literal', 92)], _identity)
# sre_compile._compile([17, 4, 0, 3, 0, 29, 12, 0, 4294967295L, 15, 7, 26, 19, 34, 19, 92, 0, 1, 28, 0, 0, 4294967295L, 7, 6, 19, 92, 2, 18, 15, 13, 19, 34, 5, 7, 0, 19, 34, 19, 34, 1, 18, 2, 0, 29, 0, 0, 4294967295L], [(IN, [('negate', None), ('literal', 34), ('literal', 92)])], 0)
# sre_compile._compile([17, 8, 3, 1, 1, 1, 1, 97, 0], [('literal', 97)], 0)
_compile("a", 0)
pass
# This is the string tokenizer.PseudoToken:
pattern = '[ \\f\\t]*((\\\\\\r?\\n|\\Z|#[^\\r\\n]*|([uUbB]?[rR]?\'\'\'|[uUbB]?[rR]?"""))|((\\d+[jJ]|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)[jJ])|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)|(0[xX][\\da-fA-F]+[lL]?|0[bB][01]+[lL]?|(0[oO][0-7]+)|(0[0-7]*)[lL]?|[1-9]\\d*[lL]?))|((\\*\\*=?|>>=?|<<=?|<>|!=|//=?|[+\\-*/%&|^=<>]=?|~)|[][(){}]|(\\r?\\n|[:;.,`@]))|([uUbB]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*(\'|\\\\\\r?\\n)|[uUbB]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*("|\\\\\\r?\\n))|[a-zA-Z_]\\w*)'
for i in xrange(100):
# re.compile checks if the pattern is in the cache, and then calls sre_compile:
sre_compile.compile(pattern, 0)
for i in xrange(600):
p = sre_parse.Pattern()
p.flags = 0
p.str = pattern
sre_parse._parse_sub(sre_parse.Tokenizer(pattern), p, 0)
......@@ -650,8 +650,8 @@ BoxedModule* createModule(const std::string& name, const char* fn = NULL, const
Box* moduleInit(BoxedModule* self, Box* name, Box* doc = NULL);
// TODO where to put this
void appendToSysPath(const std::string& path);
void prependToSysPath(const std::string& path);
void appendToSysPath(llvm::StringRef path);
void prependToSysPath(llvm::StringRef path);
void addToSysArgv(const char* str);
// Raise a SyntaxError that occurs at a specific location.
......
......@@ -254,7 +254,7 @@ extern "C" Box* chr(Box* arg) {
}
char c = (char)n;
return boxStringRef(llvm::StringRef(&c, 1));
return boxString(llvm::StringRef(&c, 1));
}
extern "C" Box* unichr(Box* arg) {
......
......@@ -197,12 +197,12 @@ void addToSysArgv(const char* str) {
listAppendInternal(sys_argv, boxStrConstant(str));
}
void appendToSysPath(const std::string& path) {
void appendToSysPath(llvm::StringRef path) {
BoxedList* sys_path = getSysPath();
listAppendInternal(sys_path, boxStringPtr(&path));
listAppendInternal(sys_path, boxString(path));
}
void prependToSysPath(const std::string& path) {
void prependToSysPath(llvm::StringRef path) {
BoxedList* sys_path = getSysPath();
static std::string attr = "insert";
callattr(sys_path, &attr, CallattrFlags({.cls_only = false, .null_on_nonexistent = false }), ArgPassSpec(2),
......
......@@ -28,7 +28,7 @@ extern "C" {
BoxedClass* classobj_cls, *instance_cls;
}
static Box* classLookup(BoxedClassobj* cls, const std::string& attr) {
static Box* classLookup(BoxedClassobj* cls, llvm::StringRef attr) {
Box* r = cls->getattr(attr);
if (r)
return r;
......
......@@ -632,7 +632,7 @@ BoxedFloat* _floatNew(Box* a) {
} else if (isSubclass(a->cls, int_cls)) {
return new BoxedFloat(static_cast<BoxedInt*>(a)->n);
} else if (a->cls == str_cls) {
const std::string& s = static_cast<BoxedString*>(a)->s();
llvm::StringRef s = static_cast<BoxedString*>(a)->s();
if (s == "nan")
return new BoxedFloat(NAN);
if (s == "-nan")
......@@ -644,10 +644,11 @@ BoxedFloat* _floatNew(Box* a) {
// TODO this should just use CPython's implementation:
char* endptr;
const char* startptr = s.c_str();
assert(s.data()[s.size()] == '\0');
const char* startptr = s.data();
double r = strtod(startptr, &endptr);
if (endptr != startptr + s.size())
raiseExcHelper(ValueError, "could not convert string to float: %s", s.c_str());
raiseExcHelper(ValueError, "could not convert string to float: %s", s.data());
return new BoxedFloat(r);
} else {
static const std::string float_str("__float__");
......
......@@ -50,17 +50,10 @@ extern "C" Box* boxStringPtr(const std::string* s) {
return new (s->size()) BoxedString(s->c_str(), s->size());
}
Box* boxStringRef(llvm::StringRef s) {
Box* boxString(llvm::StringRef s) {
return new (s.size()) BoxedString(s);
}
Box* boxString(const std::string& s) {
return new (s.size()) BoxedString(s.c_str(), s.size());
}
Box* boxString(std::string&& s) {
return new (s.size()) BoxedString(s.c_str(), s.size());
}
Box* boxStringTwine(const llvm::Twine& t) {
llvm::SmallString<256> Vec;
return boxString(t.toStringRef(Vec));
......
......@@ -923,7 +923,7 @@ extern "C" Box* intHex(BoxedInt* self) {
len = snprintf(buf, sizeof(buf), "-0x%lx", std::abs(self->n));
else
len = snprintf(buf, sizeof(buf), "0x%lx", self->n);
return boxStringRef(llvm::StringRef(buf, len));
return boxString(llvm::StringRef(buf, len));
}
extern "C" Box* intOct(BoxedInt* self) {
......
......@@ -625,8 +625,9 @@ BoxedLong* _longNew(Box* val, Box* _base) {
} else if (isSubclass(val->cls, int_cls)) {
mpz_init_set_si(rtn->n, static_cast<BoxedInt*>(val)->n);
} else if (val->cls == str_cls) {
const std::string& s = static_cast<BoxedString*>(val)->s();
int r = mpz_init_set_str(rtn->n, s.c_str(), 10);
llvm::StringRef s = static_cast<BoxedString*>(val)->s();
assert(s.data()[s.size()] == '\0');
int r = mpz_init_set_str(rtn->n, s.data(), 10);
RELEASE_ASSERT(r == 0, "");
} else if (val->cls == float_cls) {
mpz_init_set_si(rtn->n, static_cast<BoxedFloat*>(val)->d);
......
......@@ -1117,7 +1117,7 @@ Box* descriptorClsSpecialCases(GetattrRewriteArgs* rewrite_args, BoxedClass* cls
Box* boxChar(char c) {
char d[1];
d[0] = c;
return boxStringRef(llvm::StringRef(d, 1));
return boxString(llvm::StringRef(d, 1));
}
static Box* noneIfNull(Box* b) {
......@@ -1265,7 +1265,7 @@ Box* dataDescriptorInstanceSpecialCases(GetattrRewriteArgs* rewrite_args, llvm::
rewrite_args = NULL;
REWRITE_ABORTED("");
char* rtn = reinterpret_cast<char*>((char*)obj + member_desc->offset);
return boxStringRef(llvm::StringRef(rtn));
return boxString(llvm::StringRef(rtn));
}
default:
......@@ -3695,7 +3695,11 @@ Box* compareInternal(Box* lhs, Box* rhs, int op_type, CompareRewriteArgs* rewrit
return boxBool(result);
}
bool b = nonzero(contained);
bool b;
if (contained->cls == bool_cls)
b = contained == True;
else
b = contained->nonzeroIC();
if (op_type == AST_TYPE::NotIn)
return boxBool(!b);
return boxBool(b);
......
......@@ -516,7 +516,7 @@ void setupSet() {
v_fu.push_back(FROZENSET);
v_fu.push_back(UNKNOWN);
auto add = [&](const std::string& name, void* func) {
auto add = [&](llvm::StringRef name, void* func) {
CLFunction* func_obj = createRTFunction(2, 0, false, false);
addRTFunction(func_obj, (void*)func, SET, v_ss);
addRTFunction(func_obj, (void*)func, SET, v_sf);
......
......@@ -1823,9 +1823,9 @@ Box* strStrip(BoxedString* self, Box* chars) {
if (isSubclass(chars->cls, str_cls)) {
auto chars_str = static_cast<BoxedString*>(chars)->s();
return boxStringRef(str.trim(chars_str));
return boxString(str.trim(chars_str));
} else if (chars->cls == none_cls) {
return boxStringRef(str.trim(" \t\n\r\f\v"));
return boxString(str.trim(" \t\n\r\f\v"));
} else if (isSubclass(chars->cls, unicode_cls)) {
PyObject* uniself = PyUnicode_FromObject((PyObject*)self);
PyObject* res;
......@@ -1847,9 +1847,9 @@ Box* strLStrip(BoxedString* self, Box* chars) {
if (isSubclass(chars->cls, str_cls)) {
auto chars_str = static_cast<BoxedString*>(chars)->s();
return boxStringRef(str.ltrim(chars_str));
return boxString(str.ltrim(chars_str));
} else if (chars->cls == none_cls) {
return boxStringRef(str.ltrim(" \t\n\r\f\v"));
return boxString(str.ltrim(" \t\n\r\f\v"));
} else if (isSubclass(chars->cls, unicode_cls)) {
PyObject* uniself = PyUnicode_FromObject((PyObject*)self);
PyObject* res;
......@@ -1871,9 +1871,9 @@ Box* strRStrip(BoxedString* self, Box* chars) {
if (isSubclass(chars->cls, str_cls)) {
auto chars_str = static_cast<BoxedString*>(chars)->s();
return boxStringRef(str.rtrim(chars_str));
return boxString(str.rtrim(chars_str));
} else if (chars->cls == none_cls) {
return boxStringRef(str.rtrim(" \t\n\r\f\v"));
return boxString(str.rtrim(" \t\n\r\f\v"));
} else if (isSubclass(chars->cls, unicode_cls)) {
PyObject* uniself = PyUnicode_FromObject((PyObject*)self);
PyObject* res;
......@@ -2220,7 +2220,7 @@ extern "C" Box* strGetitem(BoxedString* self, Box* slice) {
}
char c = self->s()[n];
return boxStringRef(llvm::StringRef(&c, 1));
return boxString(llvm::StringRef(&c, 1));
} else if (slice->cls == slice_cls) {
BoxedSlice* sslice = static_cast<BoxedSlice*>(slice);
......@@ -2269,7 +2269,7 @@ public:
char c = *self->it;
++self->it;
return boxStringRef(llvm::StringRef(&c, 1));
return boxString(llvm::StringRef(&c, 1));
}
};
......
......@@ -413,7 +413,7 @@ std::string BoxedModule::name() {
}
}
Box* BoxedModule::getStringConstant(const std::string& ast_str) {
Box* BoxedModule::getStringConstant(llvm::StringRef ast_str) {
auto idx_iter = str_const_index.find(ast_str);
if (idx_iter != str_const_index.end())
return str_constants[idx_iter->second];
......@@ -1724,7 +1724,7 @@ Box* attrwrapperKeys(Box* b) {
return AttrWrapper::keys(b);
}
void attrwrapperDel(Box* b, const std::string& attr) {
void attrwrapperDel(Box* b, llvm::StringRef attr) {
AttrWrapper::delitem(b, boxString(attr));
}
......
......@@ -111,9 +111,7 @@ extern "C" Box* boxInstanceMethod(Box* obj, Box* func, Box* type);
extern "C" Box* boxUnboundInstanceMethod(Box* func, Box* type);
extern "C" Box* boxStringPtr(const std::string* s);
Box* boxString(const std::string& s);
Box* boxString(std::string&& s);
Box* boxStringRef(llvm::StringRef s);
Box* boxString(llvm::StringRef s);
Box* boxStringTwine(const llvm::Twine& s);
extern "C" BoxedString* boxStrConstant(const char* chars);
......@@ -698,7 +696,7 @@ public:
BoxedModule() {} // noop constructor to disable zero-initialization of cls
std::string name();
Box* getStringConstant(const std::string& ast_str);
Box* getStringConstant(llvm::StringRef ast_str);
llvm::StringMap<int> str_const_index;
std::vector<Box*> str_constants;
......@@ -854,7 +852,7 @@ Box* objectSetattr(Box* obj, Box* attr, Box* value);
Box* unwrapAttrWrapper(Box* b);
Box* attrwrapperKeys(Box* b);
void attrwrapperDel(Box* b, const std::string& attr);
void attrwrapperDel(Box* b, llvm::StringRef attr);
Box* boxAst(AST* ast);
AST* unboxAst(Box* b);
......
......@@ -6,7 +6,7 @@ if __name__ == "__main__":
parser.add_argument("tracebacks_file", action="store", default=None)
parser.add_argument("--num-display", action="store", default=6, type=int)
parser.add_argument("--dedup-frames", action="store", default=None, type=int)
parser.add_argument("--dedup-file", action="store_true")
parser.add_argument("--dedup-file", action="store", const=1, nargs='?')
parser.add_argument("--dedup-function", action="store_true")
args = parser.parse_args()
......@@ -20,7 +20,20 @@ if __name__ == "__main__":
key_func = lambda t: '\n'.join(t.split('\n')[-2 * args.dedup_frames:]) # last 4 stack frames
if args.dedup_file:
assert not key_func
key_func = lambda t: traceback_locations(t)[-1].split('"')[1]
def key_func(t):
locs = traceback_locations(t)
prev_f = None
n = int(args.dedup_file)
files = []
for l in reversed(locs):
f = l.split('"')[1]
if f == prev_f:
continue
prev_f = f
files.append(" " + f)
if len(files) == n:
break
return '\n'.join(reversed(files))
if args.dedup_function:
assert not key_func
def key_func(t):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment