reformat Plex code files

5607fabd · Stefan Behnel · 727e57d9 · 5607fabd · 5607fabd · 5607fabd
Commit 5607fabd authored Oct 10, 2014 by Stefan Behnel
9 changed files
--- a/Cython/Plex/Actions.py
+++ b/Cython/Plex/Actions.py
@@ -7,98 +7,101 @@
 #=======================================================================

 class Action(object):
+    def perform(self, token_stream, text):
+        pass  # abstract

-  def perform(self, token_stream, text):
-    pass # abstract
-
-  def same_as(self, other):
-    return self is other
+    def same_as(self, other):
+        return self is other


 class Return(Action):
-  """
-  Internal Plex action which causes |value| to
-  be returned as the value of the associated token
-  """
+    """
+    Internal Plex action which causes |value| to
+    be returned as the value of the associated token
+    """

-  def __init__(self, value):
-    self.value = value
+    def __init__(self, value):
+        self.value = value

-  def perform(self, token_stream, text):
-    return self.value
+    def perform(self, token_stream, text):
+        return self.value

-  def same_as(self, other):
-    return isinstance(other, Return) and self.value == other.value
+    def same_as(self, other):
+        return isinstance(other, Return) and self.value == other.value

-  def __repr__(self):
-    return "Return(%s)" % repr(self.value)
+    def __repr__(self):
+        return "Return(%s)" % repr(self.value)


 class Call(Action):
-  """
-  Internal Plex action which causes a function to be called.
-  """
+    """
+    Internal Plex action which causes a function to be called.
+    """

-  def __init__(self, function):
-    self.function = function
+    def __init__(self, function):
+        self.function = function

-  def perform(self, token_stream, text):
-    return self.function(token_stream, text)
+    def perform(self, token_stream, text):
+        return self.function(token_stream, text)

-  def __repr__(self):
-    return "Call(%s)" % self.function.__name__
+    def __repr__(self):
+        return "Call(%s)" % self.function.__name__

-  def same_as(self, other):
-    return isinstance(other, Call) and self.function is other.function
+    def same_as(self, other):
+        return isinstance(other, Call) and self.function is other.function


 class Begin(Action):
-  """
-  Begin(state_name) is a Plex action which causes the Scanner to
-  enter the state |state_name|. See the docstring of Plex.Lexicon
-  for more information.
-  """
+    """
+    Begin(state_name) is a Plex action which causes the Scanner to
+    enter the state |state_name|. See the docstring of Plex.Lexicon
+    for more information.
+    """

-  def __init__(self, state_name):
-    self.state_name = state_name
+    def __init__(self, state_name):
+        self.state_name = state_name

-  def perform(self, token_stream, text):
-    token_stream.begin(self.state_name)
+    def perform(self, token_stream, text):
+        token_stream.begin(self.state_name)

-  def __repr__(self):
-    return "Begin(%s)" % self.state_name
+    def __repr__(self):
+        return "Begin(%s)" % self.state_name

-  def same_as(self, other):
-    return isinstance(other, Begin) and self.state_name == other.state_name
+    def same_as(self, other):
+        return isinstance(other, Begin) and self.state_name == other.state_name


 class Ignore(Action):
-  """
-  IGNORE is a Plex action which causes its associated token
-  to be ignored. See the docstring of Plex.Lexicon  for more
-  information.
-  """
-  def perform(self, token_stream, text):
-    return None
+    """
+    IGNORE is a Plex action which causes its associated token
+    to be ignored. See the docstring of Plex.Lexicon  for more
+    information.
+    """
+
+    def perform(self, token_stream, text):
+        return None
+
+    def __repr__(self):
+        return "IGNORE"

-  def __repr__(self):
-    return "IGNORE"

 IGNORE = Ignore()
 #IGNORE.__doc__ = Ignore.__doc__

+
 class Text(Action):
-  """
-  TEXT is a Plex action which causes the text of a token to
-  be returned as the value of the token. See the docstring of
-  Plex.Lexicon  for more information.
-  """
+    """
+    TEXT is a Plex action which causes the text of a token to
+    be returned as the value of the token. See the docstring of
+    Plex.Lexicon  for more information.
+    """
+
+    def perform(self, token_stream, text):
+        return text

-  def perform(self, token_stream, text):
-    return text
+    def __repr__(self):
+        return "TEXT"

-  def __repr__(self):
-    return "TEXT"

 TEXT = Text()
 #TEXT.__doc__ = Text.__doc__

--- a/Cython/Plex/DFA.py
+++ b/Cython/Plex/DFA.py
--- a/Cython/Plex/Errors.py
+++ b/Cython/Plex/Errors.py
@@ -6,45 +6,49 @@
 #
 #=======================================================================

+
 class PlexError(Exception):
-  message = ""
+    message = ""
+

 class PlexTypeError(PlexError, TypeError):
-  pass
+    pass
+

 class PlexValueError(PlexError, ValueError):
-  pass
+    pass
+

 class InvalidRegex(PlexError):
-  pass
+    pass
+

 class InvalidToken(PlexError):
+    def __init__(self, token_number, message):
+        PlexError.__init__(self, "Token number %d: %s" % (token_number, message))

-  def __init__(self, token_number, message):
-    PlexError.__init__(self, "Token number %d: %s" % (token_number, message))

 class InvalidScanner(PlexError):
-  pass
-
-class AmbiguousAction(PlexError):
-  message = "Two tokens with different actions can match the same string"
-
-  def __init__(self):
    pass

-class UnrecognizedInput(PlexError):
-  scanner = None
-  position = None
-  state_name = None

-  def __init__(self, scanner, state_name):
-    self.scanner = scanner
-    self.position = scanner.get_position()
-    self.state_name = state_name
-
-  def __str__(self):
-    return ("'%s', line %d, char %d: Token not recognised in state %s"
-            % (self.position + (repr(self.state_name),)))
+class AmbiguousAction(PlexError):
+    message = "Two tokens with different actions can match the same string"

+    def __init__(self):
+        pass


+class UnrecognizedInput(PlexError):
+    scanner = None
+    position = None
+    state_name = None
+
+    def __init__(self, scanner, state_name):
+        self.scanner = scanner
+        self.position = scanner.get_position()
+        self.state_name = state_name
+
+    def __str__(self):
+        return ("'%s', line %d, char %d: Token not recognised in state %s" % (
+            self.position + (repr(self.state_name),)))
--- a/Cython/Plex/Lexicons.py
+++ b/Cython/Plex/Lexicons.py
--- a/Cython/Plex/Machines.py
+++ b/Cython/Plex/Machines.py
--- a/Cython/Plex/Regexps.py
+++ b/Cython/Plex/Regexps.py
@@ -42,14 +42,15 @@ def chars_to_ranges(s):
    while i < n:
        code1 = ord(char_list[i])
        code2 = code1 + 1
-        i = i + 1
+        i += 1
        while i < n and code2 >= ord(char_list[i]):
-            code2 = code2 + 1
-            i = i + 1
+            code2 += 1
+            i += 1
        result.append(code1)
        result.append(code2)
    return result

+
 def uppercase_range(code1, code2):
    """
    If the range of characters from code1 to code2-1 includes any
@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
    else:
        return None

+
 def lowercase_range(code1, code2):
    """
    If the range of characters from code1 to code2-1 includes any
@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
    else:
        return None

+
 def CodeRanges(code_list):
    """
    Given a list of codes as returned by chars_to_ranges, return
@@ -86,6 +89,7 @@ def CodeRanges(code_list):
        re_list.append(CodeRange(code_list[i], code_list[i + 1]))
    return Alt(*re_list)

+
 def CodeRange(code1, code2):
    """
    CodeRange(code1, code2) is an RE which matches any character
@@ -93,11 +97,12 @@ def CodeRange(code1, code2):
    """
    if code1 <= nl_code < code2:
        return Alt(RawCodeRange(code1, nl_code),
-                             RawNewline,
-                             RawCodeRange(nl_code + 1, code2))
+                   RawNewline,
+                   RawCodeRange(nl_code + 1, code2))
    else:
        return RawCodeRange(code1, code2)

+
 #
 #     Abstract classes
 #
@@ -110,12 +115,12 @@ class RE(object):
         re1 | re2         is an RE which matches either |re1| or |re2|
    """

-    nullable = 1 # True if this RE can match 0 input symbols
-    match_nl = 1 # True if this RE can match a string ending with '\n'
-    str = None     # Set to a string to override the class's __str__ result
+    nullable = 1  # True if this RE can match 0 input symbols
+    match_nl = 1  # True if this RE can match a string ending with '\n'
+    str = None    # Set to a string to override the class's __str__ result

    def build_machine(self, machine, initial_state, final_state,
-                                        match_bol, nocase):
+                      match_bol, nocase):
        """
        This method should add states to |machine| to implement this
        RE, starting at |initial_state| and ending at |final_state|.
@@ -124,7 +129,7 @@ class RE(object):
        letters should be treated as equivalent.
        """
        raise NotImplementedError("%s.build_machine not implemented" %
-            self.__class__.__name__)
+                                  self.__class__.__name__)

    def build_opt(self, m, initial_state, c):
        """
@@ -160,18 +165,18 @@ class RE(object):
        self.check_string(num, value)
        if len(value) != 1:
            raise Errors.PlexValueError("Invalid value for argument %d of Plex.%s."
-                "Expected a string of length 1, got: %s" % (
-                    num, self.__class__.__name__, repr(value)))
+                                        "Expected a string of length 1, got: %s" % (
+                                            num, self.__class__.__name__, repr(value)))

    def wrong_type(self, num, value, expected):
        if type(value) == types.InstanceType:
-                got = "%s.%s instance" % (
-                    value.__class__.__module__, value.__class__.__name__)
+            got = "%s.%s instance" % (
+                value.__class__.__module__, value.__class__.__name__)
        else:
            got = type(value).__name__
        raise Errors.PlexTypeError("Invalid type for argument %d of Plex.%s "
-                                        "(expected %s, got %s" % (
-                                            num, self.__class__.__name__, expected, got))
+                                   "(expected %s, got %s" % (
+                                       num, self.__class__.__name__, expected, got))

 #
 #     Primitive RE constructors
@@ -211,6 +216,7 @@ class RE(object):
 ##     def calc_str(self):
 ##         return "Char(%s)" % repr(self.char)

+
 def Char(c):
    """
    Char(c) is an RE which matches the character |c|.
@@ -222,6 +228,7 @@ def Char(c):
    result.str = "Char(%s)" % repr(c)
    return result

+
 class RawCodeRange(RE):
    """
    RawCodeRange(code1, code2) is a low-level RE which matches any character
@@ -230,9 +237,9 @@ class RawCodeRange(RE):
    """
    nullable = 0
    match_nl = 0
-    range = None                     # (code, code)
-    uppercase_range = None # (code, code) or None
-    lowercase_range = None # (code, code) or None
+    range = None            # (code, code)
+    uppercase_range = None  # (code, code) or None
+    lowercase_range = None  # (code, code) or None

    def __init__(self, code1, code2):
        self.range = (code1, code2)
@@ -252,6 +259,7 @@ class RawCodeRange(RE):
    def calc_str(self):
        return "CodeRange(%d,%d)" % (self.code1, self.code2)

+
 class _RawNewline(RE):
    """
    RawNewline is a low-level RE which matches a newline character.
@@ -266,6 +274,7 @@ class _RawNewline(RE):
        s = self.build_opt(m, initial_state, EOL)
        s.add_transition((nl_code, nl_code + 1), final_state)

+
 RawNewline = _RawNewline()


@@ -304,7 +313,7 @@ class Seq(RE):
        i = len(re_list)
        match_nl = 0
        while i:
-            i = i - 1
+            i -= 1
            re = re_list[i]
            if re.match_nl:
                match_nl = 1
@@ -354,7 +363,7 @@ class Alt(RE):
                non_nullable_res.append(re)
            if re.match_nl:
                match_nl = 1
-            i = i + 1
+            i += 1
        self.nullable_res = nullable_res
        self.non_nullable_res = non_nullable_res
        self.nullable = nullable
@@ -411,7 +420,7 @@ class SwitchCase(RE):

    def build_machine(self, m, initial_state, final_state, match_bol, nocase):
        self.re.build_machine(m, initial_state, final_state, match_bol,
-                                                    self.nocase)
+                              self.nocase)

    def calc_str(self):
        if self.nocase:
@@ -434,6 +443,7 @@ Empty.__doc__ = \
    """
 Empty.str = "Empty"

+
 def Str1(s):
    """
    Str1(s) is an RE which matches the literal string |s|.
@@ -442,6 +452,7 @@ def Str1(s):
    result.str = "Str(%s)" % repr(s)
    return result

+
 def Str(*strs):
    """
    Str(s) is an RE which matches the literal string |s|.
@@ -454,6 +465,7 @@ def Str(*strs):
        result.str = "Str(%s)" % ','.join(map(repr, strs))
        return result

+
 def Any(s):
    """
    Any(s) is an RE which matches any character in the string |s|.
@@ -463,6 +475,7 @@ def Any(s):
    result.str = "Any(%s)" % repr(s)
    return result

+
 def AnyBut(s):
    """
    AnyBut(s) is an RE which matches any character (including
@@ -475,6 +488,7 @@ def AnyBut(s):
    result.str = "AnyBut(%s)" % repr(s)
    return result

+
 AnyChar = AnyBut("")
 AnyChar.__doc__ = \
    """
@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
    """
 AnyChar.str = "AnyChar"

-def Range(s1, s2 = None):
+
+def Range(s1, s2=None):
    """
    Range(c1, c2) is an RE which matches any single character in the range
    |c1| to |c2| inclusive.
@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
    else:
        ranges = []
        for i in range(0, len(s1), 2):
-            ranges.append(CodeRange(ord(s1[i]), ord(s1[i+1]) + 1))
+            ranges.append(CodeRange(ord(s1[i]), ord(s1[i + 1]) + 1))
        result = Alt(*ranges)
        result.str = "Range(%s)" % repr(s1)
    return result

+
 def Opt(re):
    """
    Opt(re) is an RE which matches either |re| or the empty string.
@@ -508,6 +524,7 @@ def Opt(re):
    result.str = "Opt(%s)" % re
    return result

+
 def Rep(re):
    """
    Rep(re) is an RE which matches zero or more repetitions of |re|.
@@ -516,12 +533,14 @@ def Rep(re):
    result.str = "Rep(%s)" % re
    return result

+
 def NoCase(re):
    """
    NoCase(re) is an RE which matches the same strings as RE, but treating
    upper and lower case letters as equivalent.
    """
-    return SwitchCase(re, nocase = 1)
+    return SwitchCase(re, nocase=1)
+

 def Case(re):
    """
@@ -529,7 +548,7 @@ def Case(re):
    upper and lower case letters as distinct, i.e. it cancels the effect
    of any enclosing NoCase().
    """
-    return SwitchCase(re, nocase = 0)
+    return SwitchCase(re, nocase=0)

 #
 #     RE Constants

--- a/Cython/Plex/Scanners.py
+++ b/Cython/Plex/Scanners.py
--- a/Cython/Plex/Traditional.py
+++ b/Cython/Plex/Traditional.py
@@ -13,147 +13,146 @@ from .Errors import PlexError


 class RegexpSyntaxError(PlexError):
-  pass
+    pass


 def re(s):
-  """
-  Convert traditional string representation of regular expression |s|
-  into Plex representation.
-  """
-  return REParser(s).parse_re()
+    """
+    Convert traditional string representation of regular expression |s|
+    into Plex representation.
+    """
+    return REParser(s).parse_re()


 class REParser(object):
-
-  def __init__(self, s):
-    self.s = s
-    self.i = -1
-    self.end = 0
-    self.next()
-
-  def parse_re(self):
-    re = self.parse_alt()
-    if not self.end:
-      self.error("Unexpected %s" % repr(self.c))
-    return re
-
-  def parse_alt(self):
-    """Parse a set of alternative regexps."""
-    re = self.parse_seq()
-    if self.c == '|':
-      re_list = [re]
-      while self.c == '|':
+    def __init__(self, s):
+        self.s = s
+        self.i = -1
+        self.end = 0
        self.next()
-        re_list.append(self.parse_seq())
-      re = Alt(*re_list)
-    return re
-
-  def parse_seq(self):
-    """Parse a sequence of regexps."""
-    re_list = []
-    while not self.end and not self.c in "|)":
-      re_list.append(self.parse_mod())
-    return Seq(*re_list)
-
-  def parse_mod(self):
-    """Parse a primitive regexp followed by *, +, ? modifiers."""
-    re = self.parse_prim()
-    while not self.end and self.c in "*+?":
-      if self.c == '*':
-        re = Rep(re)
-      elif self.c == '+':
-        re = Rep1(re)
-      else: # self.c == '?'
-        re = Opt(re)
-      self.next()
-    return re
-
-  def parse_prim(self):
-    """Parse a primitive regexp."""
-    c = self.get()
-    if c == '.':
-      re = AnyBut("\n")
-    elif c == '^':
-      re = Bol
-    elif c == '$':
-      re = Eol
-    elif c == '(':
-      re = self.parse_alt()
-      self.expect(')')
-    elif c == '[':
-      re = self.parse_charset()
-      self.expect(']')
-    else:
-      if c == '\\':
+
+    def parse_re(self):
+        re = self.parse_alt()
+        if not self.end:
+            self.error("Unexpected %s" % repr(self.c))
+        return re
+
+    def parse_alt(self):
+        """Parse a set of alternative regexps."""
+        re = self.parse_seq()
+        if self.c == '|':
+            re_list = [re]
+            while self.c == '|':
+                self.next()
+                re_list.append(self.parse_seq())
+            re = Alt(*re_list)
+        return re
+
+    def parse_seq(self):
+        """Parse a sequence of regexps."""
+        re_list = []
+        while not self.end and not self.c in "|)":
+            re_list.append(self.parse_mod())
+        return Seq(*re_list)
+
+    def parse_mod(self):
+        """Parse a primitive regexp followed by *, +, ? modifiers."""
+        re = self.parse_prim()
+        while not self.end and self.c in "*+?":
+            if self.c == '*':
+                re = Rep(re)
+            elif self.c == '+':
+                re = Rep1(re)
+            else:  # self.c == '?'
+                re = Opt(re)
+            self.next()
+        return re
+
+    def parse_prim(self):
+        """Parse a primitive regexp."""
        c = self.get()
-      re = Char(c)
-    return re
-
-  def parse_charset(self):
-    """Parse a charset. Does not include the surrounding []."""
-    char_list = []
-    invert = 0
-    if self.c == '^':
-      invert = 1
-      self.next()
-    if self.c == ']':
-      char_list.append(']')
-      self.next()
-    while not self.end and self.c != ']':
-      c1 = self.get()
-      if self.c == '-' and self.lookahead(1) != ']':
+        if c == '.':
+            re = AnyBut("\n")
+        elif c == '^':
+            re = Bol
+        elif c == '$':
+            re = Eol
+        elif c == '(':
+            re = self.parse_alt()
+            self.expect(')')
+        elif c == '[':
+            re = self.parse_charset()
+            self.expect(']')
+        else:
+            if c == '\\':
+                c = self.get()
+            re = Char(c)
+        return re
+
+    def parse_charset(self):
+        """Parse a charset. Does not include the surrounding []."""
+        char_list = []
+        invert = 0
+        if self.c == '^':
+            invert = 1
+            self.next()
+        if self.c == ']':
+            char_list.append(']')
+            self.next()
+        while not self.end and self.c != ']':
+            c1 = self.get()
+            if self.c == '-' and self.lookahead(1) != ']':
+                self.next()
+                c2 = self.get()
+                for a in xrange(ord(c1), ord(c2) + 1):
+                    char_list.append(chr(a))
+            else:
+                char_list.append(c1)
+        chars = ''.join(char_list)
+        if invert:
+            return AnyBut(chars)
+        else:
+            return Any(chars)
+
+    def next(self):
+        """Advance to the next char."""
+        s = self.s
+        i = self.i = self.i + 1
+        if i < len(s):
+            self.c = s[i]
+        else:
+            self.c = ''
+            self.end = 1
+
+    def get(self):
+        if self.end:
+            self.error("Premature end of string")
+        c = self.c
        self.next()
-        c2 = self.get()
-        for a in xrange(ord(c1), ord(c2) + 1):
-          char_list.append(chr(a))
-      else:
-        char_list.append(c1)
-    chars = ''.join(char_list)
-    if invert:
-      return AnyBut(chars)
-    else:
-      return Any(chars)
-
-  def next(self):
-    """Advance to the next char."""
-    s = self.s
-    i = self.i = self.i + 1
-    if i < len(s):
-      self.c = s[i]
-    else:
-      self.c = ''
-      self.end = 1
-
-  def get(self):
-    if self.end:
-      self.error("Premature end of string")
-    c = self.c
-    self.next()
-    return c
-
-  def lookahead(self, n):
-    """Look ahead n chars."""
-    j = self.i + n
-    if j < len(self.s):
-      return self.s[j]
-    else:
-      return ''
-
-  def expect(self, c):
-    """
-    Expect to find character |c| at current position.
-    Raises an exception otherwise.
-    """
-    if self.c == c:
-      self.next()
-    else:
-      self.error("Missing %s" % repr(c))
-
-  def error(self, mess):
-    """Raise exception to signal syntax error in regexp."""
-    raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
-      repr(self.s), self.i, mess))
+        return c
+
+    def lookahead(self, n):
+        """Look ahead n chars."""
+        j = self.i + n
+        if j < len(self.s):
+            return self.s[j]
+        else:
+            return ''
+
+    def expect(self, c):
+        """
+        Expect to find character |c| at current position.
+        Raises an exception otherwise.
+        """
+        if self.c == c:
+            self.next()
+        else:
+            self.error("Missing %s" % repr(c))
+
+    def error(self, mess):
+        """Raise exception to signal syntax error in regexp."""
+        raise RegexpSyntaxError("Syntax error in regexp %s at position %d: %s" % (
+            repr(self.s), self.i, mess))



--- a/Cython/Plex/Transitions.py
+++ b/Cython/Plex/Transitions.py