Commit e9908ac9 authored by Jason Madden's avatar Jason Madden

Refactor use of raw tuples/frozensets into custom classes.

This permits some more optimizations and speeds up the build another 20%
or so, in addition to being hopefully easier to follow.
parent 192ebf3b
......@@ -37,11 +37,9 @@ if sys.version_info >= (3, 0):
else:
exec("def do_exec(co, loc): exec co in loc\n")
_ex = lambda: sys.exc_info()[1]
CYTHON = os.environ.get('CYTHON') or 'cython'
DEBUG = False
DEBUG = os.environ.get('CYTHONPP_DEBUG', False)
WRITE_OUTPUT = False
if os.getenv('READTHEDOCS'):
......@@ -56,20 +54,353 @@ param_name_re = re.compile(r'^[a-zA-Z_]\w*$')
# First line of a definition of a new macro:
define_re = re.compile(r'^#define\s+([a-zA-Z_]\w*)(\((?:[^,)]+,)*[^,)]+\))?\s+(.*)$')
# Conditional directive:
condition_re = re.compile(r'^#(ifdef\s+.+|if\s+.+|else\s*|endif\s*)$')
# cython header:
cython_header_re = re.compile(r'^/\* (generated by cython [^\s*]+)[^*]+\*/$', re.I)
#assert cython_header_re.match('/* Generated by Cython 0.21.1 */').group(1) == 'Generated by Cython 0.21.1'
#assert cython_header_re.match('/* Generated by Cython 0.19 on 55-555-555 */').group(1) == 'Generated by Cython 0.19'
class Configuration(frozenset):
"""
A set of CPP conditions that apply to a given sequence
of lines. Sometimes referred to as a "tag".
Configurations are iterated in sorted order for consistency
across runs.
"""
__slots__ = ('_sorted',)
_cache = {}
def __new__(cls, iterable):
sorted_iterable = tuple(sorted(frozenset(iterable)))
if sorted_iterable not in cls._cache:
if not all(isinstance(x, Condition) for x in sorted_iterable):
raise TypeError("Must be iterable of conditions")
if not sorted_iterable:
raise TypeError("Empty configurations not allowed")
self = frozenset.__new__(cls, sorted_iterable)
self._sorted = sorted_iterable
cls._cache[sorted_iterable] = self
return cls._cache[sorted_iterable]
def union(self, other):
return Configuration(frozenset.union(self, other))
def __add__(self, conditions):
return self.union(conditions)
def difference(self, other):
return Configuration(frozenset.difference(self, other))
def __sub__(self, other):
return self.difference(other)
def __iter__(self):
return iter(self._sorted)
def format_tag(self):
return ' && '.join([x.format_cond() for x in self])
def __repr__(self):
return "Configuration({" + ', '.join((repr(x) for x in self)) + '})'
@property
def all_directives(self):
"All the directives in the conditions of this configuration"
return set(x.directive for x in self)
def is_impossible(self):
"""
Return whether the configuration (a Configuration) contradicts itself.
"""
conds = {}
for cond_name, cond_setting in self:
if cond_name in conds:
if conds.get(cond_name) != cond_setting:
return True
conds[cond_name] = cond_setting
def is_condition_true(self, directive):
if directive.startswith('#if '):
parameter = directive.split(' ', 1)[1]
elif directive.startswith('#ifdef '):
parameter = directive.split(' ', 1)[1]
parameter = 'defined(%s)' % parameter
else:
raise AssertionError('Invalid directive: %r' % directive)
cond = (parameter, True)
return cond in self
def attach_tags(self, text):
result = [x for x in text.split('\n')]
if result and not result[-1]:
del result[-1]
return [Str(x + '\n', self) for x in result]
def match_condition(line):
@classmethod
def get_configurations(cls, filename):
"""
Returns a set of Configuration objects representing
the configurations seen in the file.
"""
conditions = set()
condition_stack = []
linecount = 0
match_condition = Condition.match_condition
with open(filename) as f:
for line in f:
linecount += 1
try:
m = match_condition(line)
if m is None:
if condition_stack: # added
conditions.add(cls(condition_stack))
continue
split = m.group(1).strip().split(' ', 1)
directive = split[0].strip()
if len(split) == 1:
parameter = None
assert directive in ('else', 'endif'), directive
else:
parameter = split[1].strip()
assert directive in ('if', 'ifdef'), directive
if directive == 'ifdef':
directive = 'if'
parameter = 'defined(%s)' % parameter
if directive == 'if':
condition_stack.append(Condition(parameter, True))
elif directive == 'else':
if not condition_stack:
raise SyntaxError('Unexpected "#else"')
last_cond, true = condition_stack.pop()
assert true is True, true
condition_stack.append(Condition(last_cond, not true))
elif directive == 'endif':
if not condition_stack:
raise SyntaxError('Unexpected "#endif"')
condition_stack.pop()
else:
raise AssertionError('Internal error')
except BaseException as ex:
log('%s:%s: %s', filename, linecount, ex)
if isinstance(ex, SyntaxError):
sys.exit(1)
else:
raise
dbg("Found conditions %s", conditions)
return conditions
@classmethod
def get_permutations_of_configurations(cls, items):
"""
Returns a set of Configuration objects representing all the
possible permutations of the given list of configuration
objects. Impossible configurations are excluded.
"""
def flattened(tuple_of_configurations):
# product() produces a list of tuples. Each
# item in the tuple is a different configuration object.
set_of_configurations = set(tuple_of_configurations)
sorted_set_of_configurations = sorted(set_of_configurations)
conditions = []
for configuration in sorted_set_of_configurations:
for condition in configuration:
conditions.append(condition)
return cls(conditions)
flattened_configurations = (flattened(x) for x in product(items, repeat=len(items)))
possible_configurations = set((x for x in flattened_configurations if not x.is_impossible()))
return possible_configurations
@classmethod
def get_permutations_of_configurations_in_file(cls, filename):
"""
Returns a sorted list of unique configurations possible in the given
file.
"""
return sorted(cls.get_permutations_of_configurations(cls.get_configurations(filename)))
@classmethod
def get_complete_configurations(cls, filename):
"""
Return a sorted list of the set of unique configurations possible
in the given file; each configuration will have the all the conditions
it specifies, plus the implicit conditions that it does not specify.
"""
configurations = cls.get_permutations_of_configurations_in_file(filename)
all_cond_names = set()
for config in configurations:
all_cond_names = all_cond_names.union(config.all_directives)
result = set()
for configuration in configurations:
cond_names_in_configuration = configuration.all_directives
cond_names_not_in_configuration = all_cond_names - cond_names_in_configuration
for missing_cond_name in cond_names_not_in_configuration:
configuration = configuration + (Condition(missing_cond_name, False), )
result.add(cls(sorted(configuration)))
# XXX: Previously, this produced eight configurations for gevent/corecext.ppyx
# (containing all the possible permutations).
# But two of them produced identical results and were hashed as such
# by run_cython_on_files. We're now producing just the 6 results that
# are distinct in that case. I'm not exactly sure why
assert all(isinstance(x, Configuration) for x in result)
return sorted(result)
class Condition(tuple):
"""
A single CPP directive.
Two-tuple: (name, True|False)
"""
# Conditional directive:
condition_re = re.compile(r'^#(ifdef\s+.+|if\s+.+|else\s*|endif\s*)$')
_cache = {}
__slots__ = ()
def __new__(cls, *args):
if len(args) == 2:
# name, value; from literal constructor
sequence = args
elif len(args) == 1:
sequence = args[0]
else:
raise TypeError("wrong argument number", args)
if sequence not in cls._cache:
if len(sequence) != 2:
raise TypeError("Must be len 2", sequence)
if not isinstance(sequence[0], str) or not isinstance(sequence[1], bool):
raise TypeError("Must be (str, bool)")
cls._cache[sequence] = tuple.__new__(cls, sequence)
return cls._cache[sequence]
def __repr__(self):
return "Condition" + tuple.__repr__(self)
@property
def directive(self):
return self[0]
@property
def value(self):
return self[1]
def format_cond(self):
if self.value:
return self.directive
return '!' + self.directive
def inverted(self):
return Condition(self.directive, not self.value)
@classmethod
def match_condition(cls, line):
line = line.strip()
if line.endswith(':'):
return None
return condition_re.match(line)
return cls.condition_re.match(line)
class ConfigurationGroups(tuple):
"""
A sequence of Configurations that apply to the given line.
These are maintained in sorted order.
"""
_cache = {}
def __new__(cls, tags):
sorted_tags = tuple(sorted(tags))
if sorted_tags not in cls._cache:
if not all(isinstance(x, Configuration) for x in tags):
raise TypeError("Must be a Configuration", tags)
self = tuple.__new__(cls, sorted(tags))
self._simplified = False
cls._cache[sorted_tags] = self
return cls._cache[sorted_tags]
def __repr__(self):
return "ConfigurationGroups" + tuple.__repr__(self)
def __add__(self, other):
l = list(self)
l.extend(other)
return ConfigurationGroups(l)
def exact_reverse(self, tags2):
if not self:
return
if not tags2:
return
if not isinstance(self, tuple):
raise TypeError(repr(self))
if not isinstance(tags2, tuple):
raise TypeError(repr(tags2))
if len(self) == 1 and len(tags2) == 1:
tag1 = self[0]
tag2 = tags2[0]
assert isinstance(tag1, Configuration), tag1
assert isinstance(tag2, Configuration), tag2
if len(tag1) == 1 and len(tag2) == 1:
tag1 = list(tag1)[0]
tag2 = list(tag2)[0]
if tag1[0] == tag2[0]:
return sorted([tag1[1], tag2[1]]) == [False, True]
def format_tags(self):
return ' || '.join('(%s)' % x.format_tag() for x in sorted(self))
def simplify_tags(self):
"""
>>> simplify_tags([set([('defined(world)', True), ('defined(hello)', True)]),
... set([('defined(world)', False), ('defined(hello)', True)])])
[set([('defined(hello)', True)])]
>>> simplify_tags([set([('defined(LIBEV_EMBED)', True), ('defined(_WIN32)', True)]), set([('defined(LIBEV_EMBED)', True),
... ('defined(_WIN32)', False)]), set([('defined(_WIN32)', False), ('defined(LIBEV_EMBED)', False)]),
... set([('defined(LIBEV_EMBED)', False), ('defined(_WIN32)', True)])])
[]
"""
if self._simplified:
return self
for tag1, tag2 in combinations(self, 2):
if tag1 == tag2:
tags = list(self)
tags.remove(tag1)
return ConfigurationGroups(tags).simplify_tags()
for condition in tag1:
inverted_condition = condition.inverted()
if inverted_condition in tag2:
tag1_copy = tag1 - {inverted_condition}
tag2_copy = tag2 - {inverted_condition}
assert isinstance(tag1_copy, Configuration), tag1_copy
assert isinstance(tag2_copy, Configuration), tag2_copy
if tag1_copy == tag2_copy:
tags = list(self)
tags.remove(tag1)
tags.remove(tag2)
tags.append(tag1_copy)
return ConfigurationGroups(tags).simplify_tags()
self._simplified = True
return self
newline_token = ' <cythonpp.py: REPLACE WITH NEWLINE!> '
......@@ -80,7 +411,7 @@ def _run_cython_on_file(configuration, pyx_filename,
cache=None):
value = ''.join(lines)
sourcehash = md5(value.encode("utf-8")).hexdigest()
comment = format_tag(frozenset(configuration)) + " hash:" + str(sourcehash)
comment = configuration.format_tag() + " hash:" + str(sourcehash)
if os.path.isabs(output_filename):
raise ValueError("output cannot be absolute")
# We can't change the actual name of the pyx file because
......@@ -109,7 +440,7 @@ def _run_cython_on_file(configuration, pyx_filename,
finally:
shutil.rmtree(tempdir, True)
return attach_tags(output, configuration), configuration, sourcehash
return configuration.attach_tags(output), configuration, sourcehash
def _run_cython_on_files(pyx_filename, py_banner, banner, output_filename, preprocessed):
......@@ -141,7 +472,7 @@ def _run_cython_on_files(pyx_filename, py_banner, banner, output_filename, prepr
combined_lines = []
for line_a, line_b in zip(tagged_output, other_tagged_output):
combined_tags = line_a.tags + line_b.tags
combined_lines.append(Str(line_a, simplify_tags(combined_tags)))
combined_lines.append(Str(line_a, combined_tags.simplify_tags()))
same_results[sourcehash] = combined_lines
# Order them as they were processed for repeatability
......@@ -176,8 +507,9 @@ def process_filename(filename, output_filename=None):
py_banner = '# %s\n' % banner
preprocessed = {}
for configuration in get_configurations(filename):
preprocessed[configuration] = preprocess_filename(filename, Config(configuration))
for configuration in Configuration.get_complete_configurations(filename):
dbg("Processing %s", configuration)
preprocessed[configuration] = preprocess_filename(filename, configuration)
preprocessed[None] = preprocess_filename(filename, None)
preprocessed = expand_to_match(preprocessed.items())
......@@ -188,9 +520,9 @@ def process_filename(filename, output_filename=None):
log('Generating %s ', output_filename)
result = generate_merged(sources)
result_hash = md5(result.encode("utf-8")).hexdigest()
result_hash = md5(''.join(result.split('\n')[4:]).encode("utf-8")).hexdigest()
atomic_write(output_filename, result)
log('%s bytes\n', len(result))
log('%s bytes of hash %s\n', len(result), result_hash)
if filename != pyx_filename:
log('Saving %s', pyx_filename)
......@@ -247,7 +579,7 @@ def preprocess_filename(filename, config):
definitions[name]['params'] = parse_parameter_names(params)
dbg('Adding definition for %r: %s', name, definitions[name]['params'])
else:
m = match_condition(stripped)
m = Condition.match_condition(stripped)
if m is not None and config is not None:
if stripped == '#else':
if not including_section:
......@@ -278,10 +610,9 @@ def preprocess_filename(filename, config):
lines = [x + '\n' for x in lines]
lines = [Str_sourceline(x, linecount - 1) for x in lines]
result.extend(lines)
except BaseException:
ex = _ex()
except BaseException as ex:
log('%s:%s: %s', filename, linecount, ex)
if type(ex) is SyntaxError:
if isinstance(ex, SyntaxError):
sys.exit(1)
else:
raise
......@@ -307,9 +638,19 @@ def merge(sources):
sources = list(sources) # own copy
dbg("Merging %s", len(sources))
if len(sources) <= 1:
return [Str(str(x), simplify_tags(x.tags)) for x in sources[0]]
return [Str(str(x), x.tags.simplify_tags()) for x in sources[0]]
if not DEBUG:
pool = multiprocessing.Pool()
else:
class SerialPool(object):
def imap(self, func, arg_list):
return [func(*args) for args in arg_list]
def apply(self, func, args):
return func(*args)
pool = SerialPool()
groups = []
while len(sources) >= 2:
......@@ -340,12 +681,6 @@ def _merge(*args):
a, b = args
return list(_imerge(a, b))
def _flatten(tags):
s = set()
for tag in tags:
s.update(tag)
return frozenset(s)
def _imerge(a, b):
# caching the tags speeds up serialization and future merges
tag_cache = {}
......@@ -353,10 +688,11 @@ def _imerge(a, b):
if tag == 'equal':
for line_a, line_b in zip(a[i1:i2], b[j1:j2]):
# tags is a tuple of frozensets
line_a_tags = getattr(line_a, 'tags', ())
line_b_tags = getattr(line_b, 'tags', ())
key = _flatten(line_a_tags) | _flatten(line_b_tags)
line_a_tags = line_a.tags #getattr(line_a, 'tags', ())
line_b_tags = line_b.tags #getattr(line_b, 'tags', ())
key = (line_a_tags, line_b_tags)
tags = tag_cache.setdefault(key, line_a_tags + line_b_tags)
assert isinstance(tags, ConfigurationGroups)
yield Str(line_a, tags)
else:
for line in a[i1:i2]:
......@@ -409,93 +745,22 @@ def produce_preprocessor(iterable):
state = None
for line in iterable:
key = line.tags or None
key = line.tags# or None
if key == state:
yield wrap(line)
else:
if exact_reverse(key, state):
yield wrap('#else /* %s */\n' % format_tags(state))
if key.exact_reverse(state):
yield wrap('#else /* %s */\n' % state.format_tags())
else:
if state:
yield wrap('#endif /* %s */\n' % format_tags(state))
yield wrap('#endif /* %s */\n' % state.format_tags())
if key:
yield wrap('#if %s\n' % format_tags(key))
yield wrap('#if %s\n' % key.format_tags())
yield wrap(line)
state = key
if state:
yield wrap('#endif /* %s */\n' % format_tags(state))
def exact_reverse(tags1, tags2):
if not tags1:
return
if not tags2:
return
if not isinstance(tags1, tuple):
raise TypeError(repr(tags1))
if not isinstance(tags2, tuple):
raise TypeError(repr(tags2))
if len(tags1) == 1 and len(tags2) == 1:
tag1 = tags1[0]
tag2 = tags2[0]
assert isinstance(tag1, frozenset), tag1
assert isinstance(tag2, frozenset), tag2
if len(tag1) == 1 and len(tag2) == 1:
tag1 = list(tag1)[0]
tag2 = list(tag2)[0]
if tag1[0] == tag2[0]:
return sorted([tag1[1], tag2[1]]) == [False, True]
###
# TODO: Rework conditions/tags to use custom classes+methods instead of
# tuples and sets and functions for ease of debugging/reading.
###
def format_cond(cond):
if isinstance(cond, tuple) and len(cond) == 2 and isinstance(cond[-1], bool):
pass
else:
raise TypeError(repr(cond))
if cond[1]:
return cond[0]
else:
return '!' + cond[0]
def format_tag(tag):
if not isinstance(tag, frozenset):
raise TypeError(repr(tag))
return ' && '.join([format_cond(x) for x in sorted(tag)])
def format_tags(tags):
if not isinstance(tags, tuple):
raise TypeError(repr(tags))
return ' || '.join('(%s)' % format_tag(x) for x in tags)
def attach_tags(text, tags):
tags = frozenset(tags)
result = [x for x in text.split('\n')]
if result and not result[-1]:
del result[-1]
return [Str(x + '\n', tags) for x in result]
def is_tags_type(tags):
if not isinstance(tags, tuple):
return False
for tag in tags:
if not isinstance(tag, frozenset):
return False
for item in tag:
if isinstance(item, tuple) and len(item) == 2 and isinstance(item[1], bool) and isinstance(item[0], str):
pass
else:
raise TypeError('Invalid item: %r\n%s' % (item, tags))
return True
yield wrap('#endif /* %s */\n' % state.format_tags())
class Str(str):
"""This is a string subclass that has a set of tags attached to it.
......@@ -506,10 +771,11 @@ class Str(str):
def __new__(cls, string, tags):
if not isinstance(string, str):
raise TypeError('string must be str: %s' % (type(string), ))
if isinstance(tags, frozenset):
tags = (tags,)
if not is_tags_type(tags):
raise TypeError('tags must be a tuple of frozensets of 2-tuples: %r' % (tags, ))
if not isinstance(tags, Configuration) and not isinstance(tags, ConfigurationGroups):
raise TypeError("Must be tags or tag groups: %r" % (tags,))
if isinstance(tags, Configuration):
tags = ConfigurationGroups((tags,))
self = str.__new__(cls, string)
self.tags = tags
return self
......@@ -538,52 +804,6 @@ class Str(str):
return self.__class__(str.%s(self, *args), self.tags)''' % (method, method), locals())
def simplify_tags(tags):
"""
>>> simplify_tags([set([('defined(world)', True), ('defined(hello)', True)]),
... set([('defined(world)', False), ('defined(hello)', True)])])
[set([('defined(hello)', True)])]
>>> simplify_tags([set([('defined(LIBEV_EMBED)', True), ('defined(_WIN32)', True)]), set([('defined(LIBEV_EMBED)', True),
... ('defined(_WIN32)', False)]), set([('defined(_WIN32)', False), ('defined(LIBEV_EMBED)', False)]),
... set([('defined(LIBEV_EMBED)', False), ('defined(_WIN32)', True)])])
[]
"""
if not isinstance(tags, tuple):
raise TypeError
# First, strip any empty sets
tags = list(tags)
for x in tags:
if not x:
tags.remove(x)
return simplify_tags(tuple(tags))
for tag1, tag2 in combinations(tags, 2):
if tag1 == tag2:
tags.remove(tag1)
return simplify_tags(tuple(tags))
for item in tag1:
inverted_item = inverted(item)
if inverted_item in tag2:
tag1_copy = tag1 - {inverted_item}
tag2_copy = tag2 - {inverted_item}
if tag1_copy == tag2_copy:
tags.remove(tag1)
tags.remove(tag2)
tags.append(tag1_copy)
return simplify_tags(tuple(tags))
return tuple(tags)
def inverted(item):
if not isinstance(item, tuple):
raise TypeError(repr(item))
if len(item) != 2:
raise TypeError(repr(item))
if item[-1] is True:
return (item[0], False)
elif item[-1] is False:
return (item[0], True)
raise ValueError(repr(item))
def parse_parameter_names(x):
......@@ -751,114 +971,6 @@ def postprocess_cython_output(filename, banner):
result.append(line)
return ''.join(result)
class Config(object):
def __init__(self, configuration):
self.conditions = set(configuration)
def is_condition_true(self, directive):
if directive.startswith('#if '):
parameter = directive.split(' ', 1)[1]
elif directive.startswith('#ifdef '):
parameter = directive.split(' ', 1)[1]
parameter = 'defined(%s)' % parameter
else:
raise AssertionError('Invalid directive: %r' % directive)
cond = (parameter, True)
return cond in self.conditions
def get_conditions(filename):
conditions = set()
condition_stack = []
linecount = 0
for line in open(filename):
linecount += 1
try:
m = match_condition(line)
if m is not None:
split = m.group(1).strip().split(' ', 1)
directive = split[0].strip()
if len(split) == 1:
parameter = None
assert directive in ('else', 'endif'), directive
else:
parameter = split[1].strip()
assert directive in ('if', 'ifdef'), directive
if directive == 'ifdef':
directive = 'if'
parameter = 'defined(%s)' % parameter
if directive == 'if':
condition_stack.append((parameter, True))
elif directive == 'else':
if not condition_stack:
raise SyntaxError('Unexpected "#else"')
last_cond, true = condition_stack.pop()
assert true is True, true
condition_stack.append((last_cond, not true))
elif directive == 'endif':
if not condition_stack:
raise SyntaxError('Unexpected "#endif"')
condition_stack.pop()
else:
raise AssertionError('Internal error')
else:
conditions.add(tuple(condition_stack))
except BaseException:
ex = _ex()
log('%s:%s: %s', filename, linecount, ex)
if type(ex) is SyntaxError:
sys.exit(1)
else:
raise
return conditions
def flat_tuple(x):
result = []
for item in x:
for subitem in item:
result.append(subitem)
return tuple(result)
def get_selections(items):
return set([flat_tuple(sorted(set(x))) for x in product(items, repeat=len(items))])
def is_impossible(configuration):
conds = {}
for cond, flag in configuration:
if cond in conds:
if conds.get(cond) != flag:
return True
conds[cond] = flag
def get_configurations(filename):
conditions = get_conditions(filename)
configurations = []
allconds = set()
for configuration in get_selections(conditions):
if not is_impossible(configuration):
configurations.append(configuration)
for cond, flag in configuration:
allconds.add(cond)
result = set()
for configuration in configurations:
conds = set(x[0] for x in configuration)
missing_conds = allconds - conds
for cond in missing_conds:
configuration = configuration + ((cond, False), )
result.add(tuple(sorted(configuration)))
return result
def log(message, *args):
try:
string = message % args
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment