cythonpp.py 37.7 KB
Newer Older
1
#!/usr/bin/env python
2
# Copyright (C) 2011-2012 Denis Bilenko (http://denisbilenko.com)
Jason Madden's avatar
Jason Madden committed
3 4
# Copyright (C) 2015-2016 gevent contributors
from __future__ import print_function
5 6
import sys
import os
7
import os.path
8 9 10 11 12
import re
import traceback
import datetime
import difflib
from hashlib import md5
13
from itertools import combinations, product
Jason Madden's avatar
Jason Madden committed
14 15 16 17 18 19 20 21 22 23 24
import subprocess
import multiprocessing
import tempfile
import shutil

import threading

class Thread(threading.Thread):
    value = None

    def run(self):
25 26
        target = getattr(self, '_target', None) # Py3
        if target is None:
Jason Madden's avatar
Jason Madden committed
27 28 29 30 31
            target = getattr(self, '_Thread__target')
            args = getattr(self, '_Thread__args')
        else:
            args = self._args
        self.value = target(*args)
32

Denis Bilenko's avatar
Denis Bilenko committed
33
do_exec = None
34 35 36 37 38
if sys.version_info >= (3, 0):
    exec("def do_exec(co, loc): exec(co, loc)\n")
else:
    exec("def do_exec(co, loc): exec co in loc\n")

39

Denis Bilenko's avatar
Denis Bilenko committed
40
CYTHON = os.environ.get('CYTHON') or 'cython'
41
DEBUG = os.environ.get('CYTHONPP_DEBUG', False)
42 43
WRITE_OUTPUT = False

44 45 46 47 48 49
if os.getenv('READTHEDOCS'):
    # Sometimes RTD fails to put our virtualenv bin directory
    # on the PATH, meaning we can't run cython. Fix that.
    new_path = os.environ['PATH'] + os.pathsep + os.path.dirname(sys.executable)
    os.environ['PATH'] = new_path

50
# Parameter name in macros must match this regex:
Jason Madden's avatar
Jason Madden committed
51
param_name_re = re.compile(r'^[a-zA-Z_]\w*$')
52

53
# First line of a definition of a new macro:
54 55 56
define_re = re.compile(r'^#define\s+([a-zA-Z_]\w*)(\((?:[^,)]+,)*[^,)]+\))?\s+(.*)$')


57 58 59 60 61
# cython header:
cython_header_re = re.compile(r'^/\* (generated by cython [^\s*]+)[^*]+\*/$', re.I)
#assert cython_header_re.match('/* Generated by Cython 0.21.1 */').group(1) == 'Generated by Cython 0.21.1'
#assert cython_header_re.match('/* Generated by Cython 0.19 on 55-555-555 */').group(1) == 'Generated by Cython 0.19'

62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
class Configuration(frozenset):
    """
    A set of CPP conditions that apply to a given sequence
    of lines. Sometimes referred to as a "tag".

    Configurations are iterated in sorted order for consistency
    across runs.
    """
    __slots__ = ('_sorted',)
    _cache = {}

    def __new__(cls, iterable):
        sorted_iterable = tuple(sorted(frozenset(iterable)))
        if sorted_iterable not in cls._cache:
            if not all(isinstance(x, Condition) for x in sorted_iterable):
                raise TypeError("Must be iterable of conditions")
            if not sorted_iterable:
                raise TypeError("Empty configurations not allowed")
            self = frozenset.__new__(cls, sorted_iterable)
            self._sorted = sorted_iterable
            cls._cache[sorted_iterable] = self

        return cls._cache[sorted_iterable]

    def union(self, other):
        return Configuration(frozenset.union(self, other))

    def __add__(self, conditions):
        return self.union(conditions)

    def difference(self, other):
        return Configuration(frozenset.difference(self, other))

    def __sub__(self, other):
        return self.difference(other)

    def __iter__(self):
        return iter(self._sorted)

    def format_tag(self):
        return ' && '.join([x.format_cond() for x in self])

    def __repr__(self):
        return "Configuration({" + ', '.join((repr(x) for x in self)) + '})'

    @property
    def all_directives(self):
        "All the directives in the conditions of this configuration"
        return set(x.directive for x in self)

    def is_impossible(self):
        """
        Return whether the configuration (a Configuration) contradicts itself.
        """
        conds = {}
        for cond_name, cond_setting in self:
            if cond_name in conds:
                if conds.get(cond_name) != cond_setting:
                    return True
            conds[cond_name] = cond_setting

    def is_condition_true(self, directive):
        if directive.startswith('#if '):
            parameter = directive.split(' ', 1)[1]
        elif directive.startswith('#ifdef '):
            parameter = directive.split(' ', 1)[1]
            parameter = 'defined(%s)' % parameter
        else:
            raise AssertionError('Invalid directive: %r' % directive)
        cond = (parameter, True)
        return cond in self

    def attach_tags(self, text):
        result = [x for x in text.split('\n')]
        if result and not result[-1]:
            del result[-1]
        return [Str(x + '\n', self) for x in result]

    @classmethod
    def get_configurations(cls, filename):
        """
        Returns a set of Configuration objects representing
        the configurations seen in the file.
        """
        conditions = set()
        condition_stack = []
        linecount = 0
        match_condition = Condition.match_condition
        with open(filename) as f:
            for line in f:
                linecount += 1
                try:
                    m = match_condition(line)
                    if m is None:
                        if condition_stack: # added
                            conditions.add(cls(condition_stack))
                        continue

                    split = m.group(1).strip().split(' ', 1)
                    directive = split[0].strip()
                    if len(split) == 1:
                        parameter = None
                        assert directive in ('else', 'endif'), directive
                    else:
                        parameter = split[1].strip()
                        assert directive in ('if', 'ifdef'), directive

                    if directive == 'ifdef':
                        directive = 'if'
                        parameter = 'defined(%s)' % parameter

                    if directive == 'if':
                        condition_stack.append(Condition(parameter, True))
                    elif directive == 'else':
                        if not condition_stack:
                            raise SyntaxError('Unexpected "#else"')
                        last_cond, true = condition_stack.pop()
                        assert true is True, true
                        condition_stack.append(Condition(last_cond, not true))
                    elif directive == 'endif':
                        if not condition_stack:
                            raise SyntaxError('Unexpected "#endif"')
                        condition_stack.pop()
                    else:
                        raise AssertionError('Internal error')
                except BaseException as ex:
                    log('%s:%s: %s', filename, linecount, ex)
                    if isinstance(ex, SyntaxError):
                        sys.exit(1)
                    else:
                        raise
        dbg("Found conditions %s", conditions)
        return conditions


    @classmethod
    def get_permutations_of_configurations(cls, items):
        """
        Returns a set of Configuration objects representing all the
        possible permutations of the given list of configuration
        objects. Impossible configurations are excluded.
        """
        def flattened(tuple_of_configurations):
            # product() produces a list of tuples. Each
            # item in the tuple is a different configuration object.
            set_of_configurations = set(tuple_of_configurations)
            sorted_set_of_configurations = sorted(set_of_configurations)
            conditions = []
            for configuration in sorted_set_of_configurations:
                for condition in configuration:
                    conditions.append(condition)
            return cls(conditions)

        flattened_configurations = (flattened(x) for x in product(items, repeat=len(items)))
        possible_configurations = set((x for x in flattened_configurations if not x.is_impossible()))

        return possible_configurations

    @classmethod
    def get_permutations_of_configurations_in_file(cls, filename):
        """
        Returns a sorted list of unique configurations possible in the given
        file.
        """
        return sorted(cls.get_permutations_of_configurations(cls.get_configurations(filename)))

    @classmethod
    def get_complete_configurations(cls, filename):
        """
        Return a sorted list of the set of unique configurations possible
        in the given file; each configuration will have the all the conditions
        it specifies, plus the implicit conditions that it does not specify.
        """
        configurations = cls.get_permutations_of_configurations_in_file(filename)
        all_cond_names = set()
        for config in configurations:
            all_cond_names = all_cond_names.union(config.all_directives)

        result = set()
        for configuration in configurations:
            cond_names_in_configuration = configuration.all_directives
            cond_names_not_in_configuration = all_cond_names - cond_names_in_configuration
            for missing_cond_name in cond_names_not_in_configuration:
                configuration = configuration + (Condition(missing_cond_name, False), )
            result.add(cls(sorted(configuration)))

        # XXX: Previously, this produced eight configurations for gevent/corecext.ppyx
        # (containing all the possible permutations).
        # But two of them produced identical results and were hashed as such
        # by run_cython_on_files. We're now producing just the 6 results that
        # are distinct in that case. I'm not exactly sure why
        assert all(isinstance(x, Configuration) for x in result)
        return sorted(result)

class Condition(tuple):
    """
    A single CPP directive.

    Two-tuple: (name, True|False)
    """
    # Conditional directive:
    condition_re = re.compile(r'^#(ifdef\s+.+|if\s+.+|else\s*|endif\s*)$')

    _cache = {}

    __slots__ = ()

    def __new__(cls, *args):
        if len(args) == 2:
            # name, value; from literal constructor
            sequence = args
        elif len(args) == 1:
            sequence = args[0]
        else:
            raise TypeError("wrong argument number", args)

        if sequence not in cls._cache:
            if len(sequence) != 2:
                raise TypeError("Must be len 2", sequence)
            if not isinstance(sequence[0], str) or not isinstance(sequence[1], bool):
                raise TypeError("Must be (str, bool)")
            cls._cache[sequence] = tuple.__new__(cls, sequence)
        return cls._cache[sequence]

    def __repr__(self):
        return "Condition" + tuple.__repr__(self)

    @property
    def directive(self):
        return self[0]

    @property
    def value(self):
        return self[1]

    def format_cond(self):
        if self.value:
            return self.directive

        return '!' + self.directive

    def inverted(self):
        return Condition(self.directive, not self.value)

    @classmethod
    def match_condition(cls, line):
        line = line.strip()
        if line.endswith(':'):
            return None
        return cls.condition_re.match(line)

class ConfigurationGroups(tuple):
    """
    A sequence of Configurations that apply to the given line.

    These are maintained in sorted order.
    """

    _cache = {}

    def __new__(cls, tags):
        sorted_tags = tuple(sorted(tags))
        if sorted_tags not in cls._cache:
            if not all(isinstance(x, Configuration) for x in tags):
                raise TypeError("Must be a Configuration", tags)

            self = tuple.__new__(cls, sorted(tags))
            self._simplified = False
            cls._cache[sorted_tags] = self
        return cls._cache[sorted_tags]

    def __repr__(self):
        return "ConfigurationGroups" + tuple.__repr__(self)

    def __add__(self, other):
        l = list(self)
        l.extend(other)
        return ConfigurationGroups(l)

    def exact_reverse(self, tags2):
        if not self:
            return
        if not tags2:
            return
        if not isinstance(self, tuple):
            raise TypeError(repr(self))
        if not isinstance(tags2, tuple):
            raise TypeError(repr(tags2))
        if len(self) == 1 and len(tags2) == 1:
            tag1 = self[0]
            tag2 = tags2[0]
            assert isinstance(tag1, Configuration), tag1
            assert isinstance(tag2, Configuration), tag2
            if len(tag1) == 1 and len(tag2) == 1:
                tag1 = list(tag1)[0]
                tag2 = list(tag2)[0]
                if tag1[0] == tag2[0]:
                    return sorted([tag1[1], tag2[1]]) == [False, True]

    def format_tags(self):
        return ' || '.join('(%s)' % x.format_tag() for x in sorted(self))


    def simplify_tags(self):
        """
        >>> simplify_tags([set([('defined(world)', True), ('defined(hello)', True)]),
        ...                set([('defined(world)', False), ('defined(hello)', True)])])
        [set([('defined(hello)', True)])]
        >>> simplify_tags([set([('defined(LIBEV_EMBED)', True), ('defined(_WIN32)', True)]), set([('defined(LIBEV_EMBED)', True),
        ... ('defined(_WIN32)', False)]), set([('defined(_WIN32)', False), ('defined(LIBEV_EMBED)', False)]),
        ... set([('defined(LIBEV_EMBED)', False), ('defined(_WIN32)', True)])])
        []
        """
        if self._simplified:
            return self

        for tag1, tag2 in combinations(self, 2):
            if tag1 == tag2:
                tags = list(self)
                tags.remove(tag1)
                return ConfigurationGroups(tags).simplify_tags()

            for condition in tag1:
                inverted_condition = condition.inverted()
                if inverted_condition in tag2:
                    tag1_copy = tag1 - {inverted_condition}
                    tag2_copy = tag2 - {inverted_condition}

                    assert isinstance(tag1_copy, Configuration), tag1_copy
                    assert isinstance(tag2_copy, Configuration), tag2_copy

                    if tag1_copy == tag2_copy:
                        tags = list(self)
                        tags.remove(tag1)
                        tags.remove(tag2)
                        tags.append(tag1_copy)
                        return ConfigurationGroups(tags).simplify_tags()

        self._simplified = True
        return self
Denis Bilenko's avatar
Denis Bilenko committed
402

403

404 405
newline_token = ' <cythonpp.py: REPLACE WITH NEWLINE!> '

Jason Madden's avatar
Jason Madden committed
406 407 408 409 410 411 412
def _run_cython_on_file(configuration, pyx_filename,
                        py_banner, banner,
                        output_filename,
                        counter, lines,
                        cache=None):
    value = ''.join(lines)
    sourcehash = md5(value.encode("utf-8")).hexdigest()
413
    comment = configuration.format_tag() + " hash:" + str(sourcehash)
Jason Madden's avatar
Jason Madden committed
414 415 416 417
    if os.path.isabs(output_filename):
        raise ValueError("output cannot be absolute")
    # We can't change the actual name of the pyx file because
    # cython generates function names based in that string.
418 419
    # XXX: Note that this causes cython to generate
    # a "corecext" name instead of "gevent.corecext"
Jason Madden's avatar
Jason Madden committed
420
    tempdir = tempfile.mkdtemp()
421 422 423 424
    #unique_pyx_filename = pyx_filename
    #unique_output_filename = output_filename
    unique_pyx_filename = os.path.join(tempdir, pyx_filename)
    unique_output_filename = os.path.join(tempdir, output_filename)
425

Jason Madden's avatar
Jason Madden committed
426
    dirname = os.path.dirname(unique_pyx_filename) # output must be in same dir
427
    dbg("Output filename %s", unique_output_filename)
428
    if dirname and not os.path.exists(dirname):
429
        dbg("Making dir %s", dirname)
Jason Madden's avatar
Jason Madden committed
430 431 432 433 434 435 436 437 438 439 440
        os.makedirs(dirname)
    try:
        atomic_write(unique_pyx_filename, py_banner + value)
        if WRITE_OUTPUT:
            atomic_write(unique_pyx_filename + '.deb', '# %s (%s)\n%s' % (banner, comment, value))
        output = run_cython(unique_pyx_filename, sourcehash, unique_output_filename, banner, comment,
                            cache)
        if WRITE_OUTPUT:
            atomic_write(unique_output_filename + '.deb', output)
    finally:
        shutil.rmtree(tempdir, True)
441

442
    return configuration.attach_tags(output), configuration, sourcehash
Jason Madden's avatar
Jason Madden committed
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460


def _run_cython_on_files(pyx_filename, py_banner, banner, output_filename, preprocessed):
    counter = 0
    threads = []
    cache = {}
    for configuration, lines in sorted(preprocessed.items()):
        counter += 1
        threads.append(Thread(target=_run_cython_on_file,
                              args=(configuration, pyx_filename,
                                    py_banner, banner, output_filename,
                                    counter, lines,
                                    cache)))
        threads[-1].start()

    for t in threads:
        t.join()

461
    same_results = {} # {sourcehash: tagged_str}
Jason Madden's avatar
Jason Madden committed
462 463 464 465 466 467 468 469 470 471 472 473
    for t in threads:
        sourcehash = t.value[2]
        tagged_output = t.value[0]
        if sourcehash not in same_results:
            same_results[sourcehash] = tagged_output
        else:
            # Nice, something to combine with tags
            other_tagged_output = same_results[sourcehash]
            assert len(tagged_output) == len(other_tagged_output)
            combined_lines = []
            for line_a, line_b in zip(tagged_output, other_tagged_output):
                combined_tags = line_a.tags + line_b.tags
474
                combined_lines.append(Str(line_a, combined_tags.simplify_tags()))
Jason Madden's avatar
Jason Madden committed
475 476
            same_results[sourcehash] = combined_lines

477 478 479 480 481
    # Order them as they were processed for repeatability
    ordered_results = []
    for t in threads:
        if t.value[0] not in ordered_results:
            ordered_results.append(same_results[t.value[2]])
Jason Madden's avatar
Jason Madden committed
482

483
    return ordered_results
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508

def process_filename(filename, output_filename=None):
    """Process the .ppyx file with preprocessor and compile it with cython.

    The algorithm is as following:

        1) Identify all possible preprocessor conditions in *filename*.
        2) Run preprocess_filename(*filename*) for each of these conditions.
        3) Process the output of preprocessor with Cython (as many times as
           there are different sources generated for different preprocessor
           definitions.
        4) Merge the output of different Cython runs using preprocessor conditions
           identified in (1).
    """
    if output_filename is None:
        output_filename = filename.rsplit('.', 1)[0] + '.c'

    pyx_filename = filename.rsplit('.', 1)[0] + '.pyx'
    assert pyx_filename != filename

    timestamp = str(datetime.datetime.now().replace(microsecond=0))
    banner = 'Generated by cythonpp.py on %s' % timestamp
    py_banner = '# %s\n' % banner

    preprocessed = {}
509 510 511
    for configuration in Configuration.get_complete_configurations(filename):
        dbg("Processing %s", configuration)
        preprocessed[configuration] = preprocess_filename(filename, configuration)
512 513 514 515 516
    preprocessed[None] = preprocess_filename(filename, None)

    preprocessed = expand_to_match(preprocessed.items())
    reference_pyx = preprocessed.pop(None)

Jason Madden's avatar
Jason Madden committed
517 518
    sources = _run_cython_on_files(pyx_filename, py_banner, banner, output_filename,
                                   preprocessed)
519

Jason Madden's avatar
Jason Madden committed
520 521
    log('Generating %s ',  output_filename)
    result = generate_merged(sources)
522
    result_hash = md5(''.join(result.split('\n')[4:]).encode("utf-8")).hexdigest()
523
    atomic_write(output_filename, result)
524
    log('%s bytes of hash %s\n', len(result), result_hash)
525 526 527 528 529 530

    if filename != pyx_filename:
        log('Saving %s', pyx_filename)
        atomic_write(pyx_filename, py_banner + ''.join(reference_pyx))


Jason Madden's avatar
Jason Madden committed
531
def generate_merged(sources):
532 533 534
    result = []
    for line in produce_preprocessor(merge(sources)):
        result.append(line.replace(newline_token, '\n'))
535
    return ''.join(result)
536 537 538 539 540 541 542 543 544 545 546 547 548 549


def preprocess_filename(filename, config):
    """Process given .ppyx file with preprocessor.

    This does the following
        1) Resolves "#if"s and "#ifdef"s using config
        2) Expands macro definitions (#define)
    """
    linecount = 0
    current_name = None
    definitions = {}
    result = []
    including_section = []
550 551 552 553 554 555 556 557 558
    with open(filename) as f:
        for line in f:
            linecount += 1
            rstripped = line.rstrip()
            stripped = rstripped.lstrip()
            try:
                if current_name is not None:
                    name = current_name
                    value = rstripped
559 560 561
                    if value.endswith('\\'):
                        value = value[:-1].rstrip()
                    else:
562 563
                        current_name = None
                    definitions[name]['lines'].append(value)
564
                else:
565 566 567 568 569 570 571 572 573 574 575 576 577
                    if not including_section or including_section[-1]:
                        m = define_re.match(stripped)
                    else:
                        m = None
                    if m is not None:
                        name, params, value = m.groups()
                        value = value.strip()
                        if value.endswith('\\'):
                            value = value[:-1].rstrip()
                            current_name = name
                        definitions[name] = {'lines': [value]}
                        if params is None:
                            dbg('Adding definition for %r', name)
578
                        else:
579 580
                            definitions[name]['params'] = parse_parameter_names(params)
                            dbg('Adding definition for %r: %s', name, definitions[name]['params'])
581
                    else:
582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
                        m = Condition.match_condition(stripped)
                        if m is not None and config is not None:
                            if stripped == '#else':
                                if not including_section:
                                    raise SyntaxError('unexpected "#else"')
                                if including_section[-1]:
                                    including_section.pop()
                                    including_section.append(False)
                                else:
                                    including_section.pop()
                                    including_section.append(True)
                            elif stripped == '#endif':
                                if not including_section:
                                    raise SyntaxError('unexpected "#endif"')
                                including_section.pop()
                            else:
                                including_section.append(config.is_condition_true(stripped))
599
                        else:
600 601
                            if including_section and not including_section[-1]:
                                pass  # skip this line because last "#if" was false
602
                            else:
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618
                                if stripped.startswith('#'):
                                    # leave comments as is
                                    result.append(Str_sourceline(line, linecount - 1))
                                else:
                                    lines = expand_definitions(line, definitions).split('\n')
                                    if lines and not lines[-1]:
                                        del lines[-1]
                                    lines = [x + '\n' for x in lines]
                                    lines = [Str_sourceline(x, linecount - 1) for x in lines]
                                    result.extend(lines)
            except BaseException as ex:
                log('%s:%s: %s', filename, linecount, ex)
                if isinstance(ex, SyntaxError):
                    sys.exit(1)
                else:
                    raise
619 620 621 622 623 624 625 626
    return result


def merge(sources):
    r"""Merge different sources into a single one. Each line of the result
    is a subclass of string that maintains the information for each configuration
    it should appear in the result.

627 628 629 630
    >>> src1 = attach_tags('hello\nworld\n', set([('defined(hello)', True), ('defined(world)', True)]))
    >>> src2 = attach_tags('goodbye\nworld\n', set([('defined(hello)', False), ('defined(world)', True)]))
    >>> src3 = attach_tags('hello\neveryone\n', set([('defined(hello)', True), ('defined(world)', False)]))
    >>> src4 = attach_tags('goodbye\neveryone\n', set([('defined(hello)', False), ('defined(world)', False)]))
631 632
    >>> from pprint import pprint
    >>> pprint(merge([src1, src2, src3, src4]))
633 634 635 636
    [Str('hello\n', [set([('defined(hello)', True)])]),
     Str('goodbye\n', [set([('defined(hello)', False)])]),
     Str('world\n', [set([('defined(world)', True)])]),
     Str('everyone\n', [set([('defined(world)', False)])])]
637
    """
Jason Madden's avatar
Jason Madden committed
638
    sources = list(sources) # own copy
639
    dbg("Merging %s", len(sources))
640
    if len(sources) <= 1:
641 642 643 644 645 646 647 648 649 650 651 652
        return [Str(str(x), x.tags.simplify_tags()) for x in sources[0]]

    if not DEBUG:
        pool = multiprocessing.Pool()
    else:
        class SerialPool(object):
            def imap(self, func, arg_list):
                return [func(*args) for args in arg_list]

            def apply(self, func, args):
                return func(*args)
        pool = SerialPool()
653

Jason Madden's avatar
Jason Madden committed
654 655 656 657 658 659
    groups = []

    while len(sources) >= 2:
        one, two = sources.pop(), sources.pop()
        groups.append((one, two))

660
    dbg("Merge groups %s", len(groups))
Jason Madden's avatar
Jason Madden committed
661
    # len sources == 0 or 1
662
    for merged in pool.imap(_merge, groups):
663
        dbg("Completed a merge in %s", os.getpid())
Jason Madden's avatar
Jason Madden committed
664 665 666 667 668 669 670 671 672
        sources.append(merged)
        # len sources == 1 or 2

        if len(sources) == 2:
            one, two = sources.pop(), sources.pop()
            sources.append(pool.apply(_merge, (one, two)))
            # len sources == 1

    # len sources should now be 1
673
    dbg("Now merging %s", len(sources))
Jason Madden's avatar
Jason Madden committed
674 675 676 677 678 679 680 681 682 683 684 685 686
    return merge(sources)


def _merge(*args):
    if isinstance(args[0], tuple):
        a, b = args[0]
    else:
        a, b = args
    return list(_imerge(a, b))

def _imerge(a, b):
    # caching the tags speeds up serialization and future merges
    tag_cache = {}
687 688 689
    for tag, i1, i2, j1, j2 in difflib.SequenceMatcher(None, a, b).get_opcodes():
        if tag == 'equal':
            for line_a, line_b in zip(a[i1:i2], b[j1:j2]):
Jason Madden's avatar
Jason Madden committed
690
                # tags is a tuple of frozensets
691 692 693
                line_a_tags = line_a.tags #getattr(line_a, 'tags', ())
                line_b_tags = line_b.tags #getattr(line_b, 'tags', ())
                key = (line_a_tags, line_b_tags)
Jason Madden's avatar
Jason Madden committed
694
                tags = tag_cache.setdefault(key, line_a_tags + line_b_tags)
695
                assert isinstance(tags, ConfigurationGroups)
696
                yield Str(line_a, tags)
697 698 699 700 701 702 703 704 705 706 707 708 709
        else:
            for line in a[i1:i2]:
                yield line
            for line in b[j1:j2]:
                yield line


def expand_to_match(items):
    """Insert empty lines so that all sources has matching line numbers for the same code"""
    cfg2newlines = {}  # maps configuration -> list
    for configuration, lines in items:
        cfg2newlines[configuration] = []

Denis Bilenko's avatar
Denis Bilenko committed
710
    maxguard = 2 ** 30
711
    while True:
712
        minimalsourceline = maxguard
713 714 715
        for configuration, lines in items:
            if lines:
                minimalsourceline = min(minimalsourceline, lines[0].sourceline)
716
        if minimalsourceline == maxguard:
717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
            break

        for configuration, lines in items:
            if lines and lines[0].sourceline <= minimalsourceline:
                cfg2newlines[configuration].append(lines[0])
                del lines[0]

        number_of_lines = max(len(x) for x in cfg2newlines.values())

        for newlines in cfg2newlines.values():
            add = (number_of_lines - len(newlines))
            newlines.extend(['\n'] * add)

    return cfg2newlines


def produce_preprocessor(iterable):

735 736
    if DEBUG:
        current_line = [0]
737

738 739 740 741 742 743 744
        def wrap(line):
            current_line[0] += 1
            dbg('%5d: %s', current_line[0], repr(str(line))[1:-1])
            return line
    else:
        def wrap(line):
            return line
745 746 747

    state = None
    for line in iterable:
748
        key = line.tags# or None
749

750
        if key == state:
751
            yield wrap(line)
752
        else:
753 754
            if key.exact_reverse(state):
                yield wrap('#else /* %s */\n' % state.format_tags())
755 756
            else:
                if state:
757
                    yield wrap('#endif /* %s */\n' % state.format_tags())
758
                if key:
759
                    yield wrap('#if %s\n' % key.format_tags())
760
            yield wrap(line)
761 762
            state = key
    if state:
763
        yield wrap('#endif /* %s */\n' % state.format_tags())
764

765 766 767 768 769 770
class Str(str):
    """This is a string subclass that has a set of tags attached to it.

    Used for merging the outputs.
    """

771 772 773
    def __new__(cls, string, tags):
        if not isinstance(string, str):
            raise TypeError('string must be str: %s' % (type(string), ))
774 775 776 777 778
        if not isinstance(tags, Configuration) and not isinstance(tags, ConfigurationGroups):
            raise TypeError("Must be tags or tag groups: %r" % (tags,))
        if isinstance(tags, Configuration):
            tags = ConfigurationGroups((tags,))

779
        self = str.__new__(cls, string)
780
        self.tags = tags
781 782
        return self

Jason Madden's avatar
Jason Madden committed
783 784 785
    def __getnewargs__(self):
        return str(self), self.tags

786
    def __repr__(self):
787
        return '%s(%s, %r)' % (self.__class__.__name__, str.__repr__(self), self.tags)
788 789

    def __add__(self, other):
790 791 792
        if not isinstance(other, str):
            raise TypeError
        return self.__class__(str.__add__(self, other), self.tags)
793 794

    def __radd__(self, other):
795 796 797
        if not isinstance(other, str):
            raise TypeError
        return self.__class__(str.__add__(other, self), self.tags)
798 799 800 801 802

    methods = ['__getslice__', '__getitem__', '__mul__', '__rmod__', '__rmul__',
               'join', 'replace', 'upper', 'lower']

    for method in methods:
803
        do_exec('''def %s(self, *args):
804
    return self.__class__(str.%s(self, *args), self.tags)''' % (method, method), locals())
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832




def parse_parameter_names(x):
    assert x.startswith('(') and x.endswith(')'), repr(x)
    x = x[1:-1]
    result = []
    for param in x.split(','):
        param = param.strip()
        if not param_name_re.match(param):
            raise SyntaxError('Invalid parameter name: %r' % param)
        result.append(param)
    return result


def parse_parameter_values(x):
    assert x.startswith('(') and x.endswith(')'), repr(x)
    x = x[1:-1]
    result = []
    for param in x.split(','):
        result.append(param.strip())
    return result


def expand_definitions(code, definitions):
    if not definitions:
        return code
833
    keys = list(definitions.keys())
834 835 836
    keys.sort(key=lambda x: (-len(x), x))
    keys = '|'.join(keys)

837
    # This regex defines a macro invocation
838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871
    re_macro = re.compile(r'(^|##|[^\w])(%s)(\([^)]+\)|$|##|[^w])' % keys)

    def repl(m):
        token = m.group(2)
        definition = definitions[token]

        params = definition.get('params', [])

        if params:
            arguments = m.group(3)
            if arguments.startswith('(') and arguments.endswith(')'):
                arguments = parse_parameter_values(arguments)
            else:
                arguments = None
            if arguments and len(params) == len(arguments):
                local_definitions = {}
                dbg('Macro %r params=%r arguments=%r source=%r', token, params, arguments, m.groups())
                for key, value in zip(params, arguments):
                    dbg('Adding argument %r=%r', key, value)
                    local_definitions[key] = {'lines': [value]}
                result = expand_definitions('\n'.join(definition['lines']), local_definitions)
            else:
                msg = 'Invalid number of arguments for macro %s: expected %s, got %s'
                msg = msg % (token, len(params), len(arguments or []))
                raise SyntaxError(msg)
        else:
            result = '\n'.join(definition['lines'])
            if m.group(3) != '##':
                result += m.group(3)
        if m.group(1) != '##':
            result = m.group(1) + result
        dbg('Replace %r with %r', m.group(0), result)
        return result

872
    for _ in range(20000):
873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888
        newcode, count = re_macro.subn(repl, code, count=1)
        if code == newcode:
            if count > 0:
                raise SyntaxError('Infinite recursion')
            return newcode
        code = newcode
    raise SyntaxError('Too many substitutions or internal error.')


class Str_sourceline(str):

    def __new__(cls, source, sourceline):
        self = str.__new__(cls, source)
        self.sourceline = sourceline
        return self

Jason Madden's avatar
Jason Madden committed
889 890
    def __getnewargs__(self):
        return str(self), self.sourceline
891 892

def atomic_write(filename, data):
893 894 895
    dirname = os.path.dirname(os.path.abspath(filename))
    tmpfd, tmpname = tempfile.mkstemp(dir=dirname, text=True)
    with os.fdopen(tmpfd, 'w') as f:
896 897 898 899
        f.write(data)
        f.flush()
        os.fsync(f.fileno())

900 901
    if os.path.exists(filename):
        os.unlink(filename)
902 903

    dbg("Renaming %s to %s", tmpname, filename)
904 905 906 907 908
    try:
        os.rename(tmpname, filename)
    except:
        log("Failed to rename '%s' to '%s", tmpname, filename)
        raise
909 910 911
    dbg('Wrote %s bytes to %s', len(data), filename)


Jason Madden's avatar
Jason Madden committed
912
def run_cython(filename, sourcehash, output_filename, banner, comment, cache=None):
913
    dbg("Cython output to %s hash %s", output_filename, sourcehash)
Jason Madden's avatar
Jason Madden committed
914
    result = cache.get(sourcehash) if cache is not None else None
915 916
    # Use an array for the argument so that filename arguments are properly
    # quoted according to local convention
917 918 919 920
    command = [CYTHON, '-o', output_filename,
               '-I', os.path.join('src', 'gevent', 'libev'),
               '-I', os.path.join('src', 'gevent'), # python.pxd, shared with c-ares
               filename]
921
    if result is not None:
922
        log('Reusing %s  # %s', command, comment)
923
        return result
924
    system(command, comment)
925
    result = postprocess_cython_output(output_filename, banner)
Jason Madden's avatar
Jason Madden committed
926 927
    if cache is not None:
        cache[sourcehash] = result
928 929 930 931
    return result


def system(command, comment):
932 933
    command_str = ' '.join(command)
    log('Running %s  # %s', command_str, comment)
Jason Madden's avatar
Jason Madden committed
934
    try:
935 936
        subprocess.check_call(command)
        dbg('\tDone running %s # %s', command_str, comment)
Jason Madden's avatar
Jason Madden committed
937
    except subprocess.CalledProcessError:
938 939 940 941 942 943
        # debugging code
        log("Path: %s", os.getenv("PATH"))
        bin_dir = os.path.dirname(sys.executable)
        bin_files = os.listdir(bin_dir)
        bin_files.sort()
        log("Bin: %s files: %s", bin_dir, ' '.join(bin_files))
Jason Madden's avatar
Jason Madden committed
944
        raise
945 946 947 948 949 950 951 952 953 954 955


def postprocess_cython_output(filename, banner):
    # this does a few things:
    # 1) converts multiline C-style (/**/) comments with a single line comment by
    #    replacing \n with newline_token
    # 2) adds our header
    # 3) remove timestamp in cython's header so that different timestamps do not
    #    confuse merger
    result = ['/* %s */\n' % (banner)]

956 957
    with open(filename) as finput:
        firstline = finput.readline()
958

959 960 961 962 963
        m = cython_header_re.match(firstline.strip())
        if m:
            result.append('/* %s */' % m.group(1))
        else:
            result.append(firstline)
964

965 966
        in_comment = False
        for line in finput:
967

968 969
            if line.endswith('\n'):
                line = line[:-1].rstrip() + '\n'
970

971 972 973 974 975 976
            if in_comment:
                if '*/' in line:
                    in_comment = False
                    result.append(line)
                else:
                    result.append(line.replace('\n', newline_token))
977
            else:
978 979 980 981 982 983 984
                if line.lstrip().startswith('/* ') and '*/' not in line:
                    line = line.lstrip()  # cython adds space before /* for some reason
                    line = line.replace('\n', newline_token)
                    result.append(line)
                    in_comment = True
                else:
                    result.append(line)
985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
    return ''.join(result)

def log(message, *args):
    try:
        string = message % args
    except Exception:
        try:
            prefix = 'Traceback (most recent call last):\n'
            lines = traceback.format_stack()[:-1]
            error_lines = traceback.format_exc().replace(prefix, '')
            last_length = len(lines[-1].strip().rsplit('    ', 1)[-1])
            last_length = min(80, last_length)
            last_length = max(5, last_length)
            msg = '%s%s    %s\n%s' % (prefix, ''.join(lines), '^' * last_length, error_lines)
            sys.stderr.write(msg)
        except Exception:
            traceback.print_exc()
        try:
            message = '%r %% %r\n\n' % (message, args)
        except Exception:
            pass
        try:
            sys.stderr.write(message)
        except Exception:
            traceback.print_exc()
    else:
Jason Madden's avatar
Jason Madden committed
1011
        print(string, file=sys.stderr)
1012 1013 1014 1015 1016 1017 1018 1019


def dbg(*args):
    if not DEBUG:
        return
    return log(*args)


Jason Madden's avatar
Jason Madden committed
1020
def main():
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('--debug', action='store_true')
    parser.add_argument('--list', action='store_true', help='Show the list of different conditions')
    parser.add_argument('--list-cond', action='store_true')
    parser.add_argument('--ignore-cond', action='store_true', help='Ignore conditional directives (only expand definitions)')
    parser.add_argument('--write-intermediate', action='store_true', help='Save intermediate files produced by preprocessor and Cython')
    parser.add_argument('-o', '--output-file', help='Specify name of generated C file')
    parser.add_argument("input")
    options = parser.parse_args()
    filename = options.input
1032 1033

    if options.debug:
1034
        global DEBUG
1035 1036 1037
        DEBUG = True

    if options.write_intermediate:
1038
        global WRITE_OUTPUT
1039 1040 1041 1042 1043 1044
        WRITE_OUTPUT = True

    run = True

    if options.list_cond:
        run = False
1045 1046
        for x in Configuration.get_configurations(filename):
            print("* ", x)
1047 1048 1049

    if options.list:
        run = False
1050 1051
        for x in Configuration.get_complete_configurations(filename):
            print("* ", x)
1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063

    if options.ignore_cond:
        run = False

        class FakeConfig(object):
            def is_condition_true(*args):
                return False

        sys.stdout.write(preprocess_filename(filename, FakeConfig()))

    if run:
        process_filename(filename, options.output_file)
Jason Madden's avatar
Jason Madden committed
1064 1065 1066 1067


if __name__ == '__main__':
    main()