Commit 424539a1 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #613 from kmod/sre_compile2

small optimization to sre_compile._compile()
parents 4e03cb5d d0fb0616
......@@ -37,7 +37,22 @@ def _compile(code, pattern, flags):
REPEATING_CODES = _REPEATING_CODES
SUCCESS_CODES = _SUCCESS_CODES
ASSERT_CODES = _ASSERT_CODES
for op, av in pattern:
# Pyston change: iterating over a SubPattern is fairly expensive:
# - SubPattern defines __getitem__ but not __iter__, so we create a seqiter
# - for each iteration, we call instance.getitem, which looks up
# SubPattern.__getitem__, and calls it
# - SubPattern.__getitem__ just does `return self.data[idx]`, which throws an
# IndexError to signal that the iteration is done.
# All the exceptions and extra layers of indirection add up.
# Cut that out by simply iterating over pattern.data, which looks like is
# usually a list, and we can use our fast iteration.
if isinstance(pattern, sre_parse.SubPattern):
pattern_iter = pattern.data
else:
pattern_iter = pattern
for op, av in pattern_iter:
if op in LITERAL_CODES:
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
......
class C:
def __init__(self):
self.l = range(10)
def __getitem__(self, idx):
return self.l[idx]
def f():
c = C()
total = 0
for _ in xrange(100000):
for i in c:
total += i
print total
f()
import sre_compile
# This is the string tokenizer.PseudoToken:
pattern = '[ \\f\\t]*((\\\\\\r?\\n|\\Z|#[^\\r\\n]*|([uUbB]?[rR]?\'\'\'|[uUbB]?[rR]?"""))|((\\d+[jJ]|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)[jJ])|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)|(0[xX][\\da-fA-F]+[lL]?|0[bB][01]+[lL]?|(0[oO][0-7]+)|(0[0-7]*)[lL]?|[1-9]\\d*[lL]?))|((\\*\\*=?|>>=?|<<=?|<>|!=|//=?|[+\\-*/%&|^=<>]=?|~)|[][(){}]|(\\r?\\n|[:;.,`@]))|([uUbB]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*(\'|\\\\\\r?\\n)|[uUbB]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*("|\\\\\\r?\\n))|[a-zA-Z_]\\w*)'
for i in xrange(200):
# re.compile checks if the pattern is in the cache, and then calls sre_compile:
sre_compile.compile(pattern, 0)
import sre_compile
import sre_constants
import sre_parse
import re
IN = sre_constants.IN
def _identity(x):
return x
for i in xrange(1000000):
# sre_compile._optimize_charset([('negate', None), ('literal', 34), ('literal', 92)], _identity)
# sre_compile._compile([17, 4, 0, 3, 0, 29, 12, 0, 4294967295L, 15, 7, 26, 19, 34, 19, 92, 0, 1, 28, 0, 0, 4294967295L, 7, 6, 19, 92, 2, 18, 15, 13, 19, 34, 5, 7, 0, 19, 34, 19, 34, 1, 18, 2, 0, 29, 0, 0, 4294967295L], [(IN, [('negate', None), ('literal', 34), ('literal', 92)])], 0)
# sre_compile._compile([17, 8, 3, 1, 1, 1, 1, 97, 0], [('literal', 97)], 0)
pass
# This is the string tokenizer.PseudoToken:
pattern = '[ \\f\\t]*((\\\\\\r?\\n|\\Z|#[^\\r\\n]*|([uUbB]?[rR]?\'\'\'|[uUbB]?[rR]?"""))|((\\d+[jJ]|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)[jJ])|((\\d+\\.\\d*|\\.\\d+)([eE][-+]?\\d+)?|\\d+[eE][-+]?\\d+)|(0[xX][\\da-fA-F]+[lL]?|0[bB][01]+[lL]?|(0[oO][0-7]+)|(0[0-7]*)[lL]?|[1-9]\\d*[lL]?))|((\\*\\*=?|>>=?|<<=?|<>|!=|//=?|[+\\-*/%&|^=<>]=?|~)|[][(){}]|(\\r?\\n|[:;.,`@]))|([uUbB]?[rR]?\'[^\\n\'\\\\]*(?:\\\\.[^\\n\'\\\\]*)*(\'|\\\\\\r?\\n)|[uUbB]?[rR]?"[^\\n"\\\\]*(?:\\\\.[^\\n"\\\\]*)*("|\\\\\\r?\\n))|[a-zA-Z_]\\w*)'
for i in xrange(100):
# re.compile checks if the pattern is in the cache, and then calls sre_compile:
sre_compile.compile(pattern, 0)
for i in xrange(600):
p = sre_parse.Pattern()
p.flags = 0
......
......@@ -28,9 +28,10 @@ namespace pyston {
#define DISABLE_STATS 0
#define STAT_ALLOCATIONS 0 && !DISABLE_STATS
#define STAT_TIMERS 0 && !DISABLE_STATS
#define EXPENSIVE_STAT_TIMERS 0 && STAT_TIMERS
#define STAT_ALLOCATIONS (0 && !DISABLE_STATS)
#define STAT_EXCEPTIONS (0 && !DISABLE_STATS)
#define STAT_TIMERS (0 && !DISABLE_STATS)
#define EXPENSIVE_STAT_TIMERS (0 && STAT_TIMERS)
#if STAT_TIMERS
#define STAT_TIMER(id, name) \
......
......@@ -57,6 +57,18 @@ void raiseRaw(const ExcInfo& e) {
assert(gc::isValidGCObject(e.value));
assert(gc::isValidGCObject(e.traceback));
#if STAT_EXCEPTIONS
static StatCounter num_exceptions("num_exceptions");
num_exceptions.log();
std::string stat_name;
if (PyType_Check(e.type))
stat_name = "num_exceptions_" + std::string(static_cast<BoxedClass*>(e.type)->tp_name);
else
stat_name = "num_exceptions_" + std::string(e.value->cls->tp_name);
Stats::log(Stats::getStatCounter(stat_name));
#endif
throw e;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment