Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Gwenaël Samain
cython
Commits
5607fabd
Commit
5607fabd
authored
Oct 10, 2014
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
reformat Plex code files
parent
727e57d9
Changes
9
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
1329 additions
and
1292 deletions
+1329
-1292
Cython/Plex/Actions.py
Cython/Plex/Actions.py
+62
-59
Cython/Plex/DFA.py
Cython/Plex/DFA.py
+137
-132
Cython/Plex/Errors.py
Cython/Plex/Errors.py
+28
-24
Cython/Plex/Lexicons.py
Cython/Plex/Lexicons.py
+166
-164
Cython/Plex/Machines.py
Cython/Plex/Machines.py
+238
-237
Cython/Plex/Regexps.py
Cython/Plex/Regexps.py
+45
-26
Cython/Plex/Scanners.py
Cython/Plex/Scanners.py
+298
-296
Cython/Plex/Traditional.py
Cython/Plex/Traditional.py
+131
-132
Cython/Plex/Transitions.py
Cython/Plex/Transitions.py
+224
-222
No files found.
Cython/Plex/Actions.py
View file @
5607fabd
...
@@ -7,98 +7,101 @@
...
@@ -7,98 +7,101 @@
#=======================================================================
#=======================================================================
class
Action
(
object
):
class
Action
(
object
):
def
perform
(
self
,
token_stream
,
text
):
pass
# abstract
def
perform
(
self
,
token_stream
,
text
):
def
same_as
(
self
,
other
):
pass
# abstract
return
self
is
other
def
same_as
(
self
,
other
):
return
self
is
other
class
Return
(
Action
):
class
Return
(
Action
):
"""
"""
Internal Plex action which causes |value| to
Internal Plex action which causes |value| to
be returned as the value of the associated token
be returned as the value of the associated token
"""
"""
def
__init__
(
self
,
value
):
def
__init__
(
self
,
value
):
self
.
value
=
value
self
.
value
=
value
def
perform
(
self
,
token_stream
,
text
):
def
perform
(
self
,
token_stream
,
text
):
return
self
.
value
return
self
.
value
def
same_as
(
self
,
other
):
def
same_as
(
self
,
other
):
return
isinstance
(
other
,
Return
)
and
self
.
value
==
other
.
value
return
isinstance
(
other
,
Return
)
and
self
.
value
==
other
.
value
def
__repr__
(
self
):
def
__repr__
(
self
):
return
"Return(%s)"
%
repr
(
self
.
value
)
return
"Return(%s)"
%
repr
(
self
.
value
)
class
Call
(
Action
):
class
Call
(
Action
):
"""
"""
Internal Plex action which causes a function to be called.
Internal Plex action which causes a function to be called.
"""
"""
def
__init__
(
self
,
function
):
def
__init__
(
self
,
function
):
self
.
function
=
function
self
.
function
=
function
def
perform
(
self
,
token_stream
,
text
):
def
perform
(
self
,
token_stream
,
text
):
return
self
.
function
(
token_stream
,
text
)
return
self
.
function
(
token_stream
,
text
)
def
__repr__
(
self
):
def
__repr__
(
self
):
return
"Call(%s)"
%
self
.
function
.
__name__
return
"Call(%s)"
%
self
.
function
.
__name__
def
same_as
(
self
,
other
):
def
same_as
(
self
,
other
):
return
isinstance
(
other
,
Call
)
and
self
.
function
is
other
.
function
return
isinstance
(
other
,
Call
)
and
self
.
function
is
other
.
function
class
Begin
(
Action
):
class
Begin
(
Action
):
"""
"""
Begin(state_name) is a Plex action which causes the Scanner to
Begin(state_name) is a Plex action which causes the Scanner to
enter the state |state_name|. See the docstring of Plex.Lexicon
enter the state |state_name|. See the docstring of Plex.Lexicon
for more information.
for more information.
"""
"""
def
__init__
(
self
,
state_name
):
def
__init__
(
self
,
state_name
):
self
.
state_name
=
state_name
self
.
state_name
=
state_name
def
perform
(
self
,
token_stream
,
text
):
def
perform
(
self
,
token_stream
,
text
):
token_stream
.
begin
(
self
.
state_name
)
token_stream
.
begin
(
self
.
state_name
)
def
__repr__
(
self
):
def
__repr__
(
self
):
return
"Begin(%s)"
%
self
.
state_name
return
"Begin(%s)"
%
self
.
state_name
def
same_as
(
self
,
other
):
def
same_as
(
self
,
other
):
return
isinstance
(
other
,
Begin
)
and
self
.
state_name
==
other
.
state_name
return
isinstance
(
other
,
Begin
)
and
self
.
state_name
==
other
.
state_name
class
Ignore
(
Action
):
class
Ignore
(
Action
):
"""
"""
IGNORE is a Plex action which causes its associated token
IGNORE is a Plex action which causes its associated token
to be ignored. See the docstring of Plex.Lexicon for more
to be ignored. See the docstring of Plex.Lexicon for more
information.
information.
"""
"""
def
perform
(
self
,
token_stream
,
text
):
return
None
def
perform
(
self
,
token_stream
,
text
):
return
None
def
__repr__
(
self
):
return
"IGNORE"
def
__repr__
(
self
):
return
"IGNORE"
IGNORE
=
Ignore
()
IGNORE
=
Ignore
()
#IGNORE.__doc__ = Ignore.__doc__
#IGNORE.__doc__ = Ignore.__doc__
class
Text
(
Action
):
class
Text
(
Action
):
"""
"""
TEXT is a Plex action which causes the text of a token to
TEXT is a Plex action which causes the text of a token to
be returned as the value of the token. See the docstring of
be returned as the value of the token. See the docstring of
Plex.Lexicon for more information.
Plex.Lexicon for more information.
"""
"""
def
perform
(
self
,
token_stream
,
text
):
return
text
def
perform
(
self
,
token_stream
,
text
):
def
__repr__
(
self
):
return
text
return
"TEXT"
def
__repr__
(
self
):
return
"TEXT"
TEXT
=
Text
()
TEXT
=
Text
()
#TEXT.__doc__ = Text.__doc__
#TEXT.__doc__ = Text.__doc__
...
...
Cython/Plex/DFA.py
View file @
5607fabd
This diff is collapsed.
Click to expand it.
Cython/Plex/Errors.py
View file @
5607fabd
...
@@ -6,45 +6,49 @@
...
@@ -6,45 +6,49 @@
#
#
#=======================================================================
#=======================================================================
class
PlexError
(
Exception
):
class
PlexError
(
Exception
):
message
=
""
message
=
""
class
PlexTypeError
(
PlexError
,
TypeError
):
class
PlexTypeError
(
PlexError
,
TypeError
):
pass
pass
class
PlexValueError
(
PlexError
,
ValueError
):
class
PlexValueError
(
PlexError
,
ValueError
):
pass
pass
class
InvalidRegex
(
PlexError
):
class
InvalidRegex
(
PlexError
):
pass
pass
class
InvalidToken
(
PlexError
):
class
InvalidToken
(
PlexError
):
def
__init__
(
self
,
token_number
,
message
):
PlexError
.
__init__
(
self
,
"Token number %d: %s"
%
(
token_number
,
message
))
def
__init__
(
self
,
token_number
,
message
):
PlexError
.
__init__
(
self
,
"Token number %d: %s"
%
(
token_number
,
message
))
class
InvalidScanner
(
PlexError
):
class
InvalidScanner
(
PlexError
):
pass
class
AmbiguousAction
(
PlexError
):
message
=
"Two tokens with different actions can match the same string"
def
__init__
(
self
):
pass
pass
class
UnrecognizedInput
(
PlexError
):
scanner
=
None
position
=
None
state_name
=
None
def
__init__
(
self
,
scanner
,
state_name
):
class
AmbiguousAction
(
PlexError
):
self
.
scanner
=
scanner
message
=
"Two tokens with different actions can match the same string"
self
.
position
=
scanner
.
get_position
()
self
.
state_name
=
state_name
def
__str__
(
self
):
return
(
"'%s', line %d, char %d: Token not recognised in state %s"
%
(
self
.
position
+
(
repr
(
self
.
state_name
),)))
def
__init__
(
self
):
pass
class
UnrecognizedInput
(
PlexError
):
scanner
=
None
position
=
None
state_name
=
None
def
__init__
(
self
,
scanner
,
state_name
):
self
.
scanner
=
scanner
self
.
position
=
scanner
.
get_position
()
self
.
state_name
=
state_name
def
__str__
(
self
):
return
(
"'%s', line %d, char %d: Token not recognised in state %s"
%
(
self
.
position
+
(
repr
(
self
.
state_name
),)))
Cython/Plex/Lexicons.py
View file @
5607fabd
This diff is collapsed.
Click to expand it.
Cython/Plex/Machines.py
View file @
5607fabd
This diff is collapsed.
Click to expand it.
Cython/Plex/Regexps.py
View file @
5607fabd
...
@@ -42,14 +42,15 @@ def chars_to_ranges(s):
...
@@ -42,14 +42,15 @@ def chars_to_ranges(s):
while
i
<
n
:
while
i
<
n
:
code1
=
ord
(
char_list
[
i
])
code1
=
ord
(
char_list
[
i
])
code2
=
code1
+
1
code2
=
code1
+
1
i
=
i
+
1
i
+=
1
while
i
<
n
and
code2
>=
ord
(
char_list
[
i
]):
while
i
<
n
and
code2
>=
ord
(
char_list
[
i
]):
code2
=
code2
+
1
code2
+=
1
i
=
i
+
1
i
+=
1
result
.
append
(
code1
)
result
.
append
(
code1
)
result
.
append
(
code2
)
result
.
append
(
code2
)
return
result
return
result
def
uppercase_range
(
code1
,
code2
):
def
uppercase_range
(
code1
,
code2
):
"""
"""
If the range of characters from code1 to code2-1 includes any
If the range of characters from code1 to code2-1 includes any
...
@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
...
@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
else
:
else
:
return
None
return
None
def
lowercase_range
(
code1
,
code2
):
def
lowercase_range
(
code1
,
code2
):
"""
"""
If the range of characters from code1 to code2-1 includes any
If the range of characters from code1 to code2-1 includes any
...
@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
...
@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
else
:
else
:
return
None
return
None
def
CodeRanges
(
code_list
):
def
CodeRanges
(
code_list
):
"""
"""
Given a list of codes as returned by chars_to_ranges, return
Given a list of codes as returned by chars_to_ranges, return
...
@@ -86,6 +89,7 @@ def CodeRanges(code_list):
...
@@ -86,6 +89,7 @@ def CodeRanges(code_list):
re_list
.
append
(
CodeRange
(
code_list
[
i
],
code_list
[
i
+
1
]))
re_list
.
append
(
CodeRange
(
code_list
[
i
],
code_list
[
i
+
1
]))
return
Alt
(
*
re_list
)
return
Alt
(
*
re_list
)
def
CodeRange
(
code1
,
code2
):
def
CodeRange
(
code1
,
code2
):
"""
"""
CodeRange(code1, code2) is an RE which matches any character
CodeRange(code1, code2) is an RE which matches any character
...
@@ -93,11 +97,12 @@ def CodeRange(code1, code2):
...
@@ -93,11 +97,12 @@ def CodeRange(code1, code2):
"""
"""
if
code1
<=
nl_code
<
code2
:
if
code1
<=
nl_code
<
code2
:
return
Alt
(
RawCodeRange
(
code1
,
nl_code
),
return
Alt
(
RawCodeRange
(
code1
,
nl_code
),
RawNewline
,
RawNewline
,
RawCodeRange
(
nl_code
+
1
,
code2
))
RawCodeRange
(
nl_code
+
1
,
code2
))
else
:
else
:
return
RawCodeRange
(
code1
,
code2
)
return
RawCodeRange
(
code1
,
code2
)
#
#
# Abstract classes
# Abstract classes
#
#
...
@@ -110,12 +115,12 @@ class RE(object):
...
@@ -110,12 +115,12 @@ class RE(object):
re1 | re2 is an RE which matches either |re1| or |re2|
re1 | re2 is an RE which matches either |re1| or |re2|
"""
"""
nullable
=
1
# True if this RE can match 0 input symbols
nullable
=
1
# True if this RE can match 0 input symbols
match_nl
=
1
# True if this RE can match a string ending with '\n'
match_nl
=
1
# True if this RE can match a string ending with '\n'
str
=
None
# Set to a string to override the class's __str__ result
str
=
None
# Set to a string to override the class's __str__ result
def
build_machine
(
self
,
machine
,
initial_state
,
final_state
,
def
build_machine
(
self
,
machine
,
initial_state
,
final_state
,
match_bol
,
nocase
):
match_bol
,
nocase
):
"""
"""
This method should add states to |machine| to implement this
This method should add states to |machine| to implement this
RE, starting at |initial_state| and ending at |final_state|.
RE, starting at |initial_state| and ending at |final_state|.
...
@@ -124,7 +129,7 @@ class RE(object):
...
@@ -124,7 +129,7 @@ class RE(object):
letters should be treated as equivalent.
letters should be treated as equivalent.
"""
"""
raise
NotImplementedError
(
"%s.build_machine not implemented"
%
raise
NotImplementedError
(
"%s.build_machine not implemented"
%
self
.
__class__
.
__name__
)
self
.
__class__
.
__name__
)
def
build_opt
(
self
,
m
,
initial_state
,
c
):
def
build_opt
(
self
,
m
,
initial_state
,
c
):
"""
"""
...
@@ -160,18 +165,18 @@ class RE(object):
...
@@ -160,18 +165,18 @@ class RE(object):
self
.
check_string
(
num
,
value
)
self
.
check_string
(
num
,
value
)
if
len
(
value
)
!=
1
:
if
len
(
value
)
!=
1
:
raise
Errors
.
PlexValueError
(
"Invalid value for argument %d of Plex.%s."
raise
Errors
.
PlexValueError
(
"Invalid value for argument %d of Plex.%s."
"Expected a string of length 1, got: %s"
%
(
"Expected a string of length 1, got: %s"
%
(
num
,
self
.
__class__
.
__name__
,
repr
(
value
)))
num
,
self
.
__class__
.
__name__
,
repr
(
value
)))
def
wrong_type
(
self
,
num
,
value
,
expected
):
def
wrong_type
(
self
,
num
,
value
,
expected
):
if
type
(
value
)
==
types
.
InstanceType
:
if
type
(
value
)
==
types
.
InstanceType
:
got
=
"%s.%s instance"
%
(
got
=
"%s.%s instance"
%
(
value
.
__class__
.
__module__
,
value
.
__class__
.
__name__
)
value
.
__class__
.
__module__
,
value
.
__class__
.
__name__
)
else
:
else
:
got
=
type
(
value
).
__name__
got
=
type
(
value
).
__name__
raise
Errors
.
PlexTypeError
(
"Invalid type for argument %d of Plex.%s "
raise
Errors
.
PlexTypeError
(
"Invalid type for argument %d of Plex.%s "
"(expected %s, got %s"
%
(
"(expected %s, got %s"
%
(
num
,
self
.
__class__
.
__name__
,
expected
,
got
))
num
,
self
.
__class__
.
__name__
,
expected
,
got
))
#
#
# Primitive RE constructors
# Primitive RE constructors
...
@@ -211,6 +216,7 @@ class RE(object):
...
@@ -211,6 +216,7 @@ class RE(object):
## def calc_str(self):
## def calc_str(self):
## return "Char(%s)" % repr(self.char)
## return "Char(%s)" % repr(self.char)
def
Char
(
c
):
def
Char
(
c
):
"""
"""
Char(c) is an RE which matches the character |c|.
Char(c) is an RE which matches the character |c|.
...
@@ -222,6 +228,7 @@ def Char(c):
...
@@ -222,6 +228,7 @@ def Char(c):
result
.
str
=
"Char(%s)"
%
repr
(
c
)
result
.
str
=
"Char(%s)"
%
repr
(
c
)
return
result
return
result
class
RawCodeRange
(
RE
):
class
RawCodeRange
(
RE
):
"""
"""
RawCodeRange(code1, code2) is a low-level RE which matches any character
RawCodeRange(code1, code2) is a low-level RE which matches any character
...
@@ -230,9 +237,9 @@ class RawCodeRange(RE):
...
@@ -230,9 +237,9 @@ class RawCodeRange(RE):
"""
"""
nullable
=
0
nullable
=
0
match_nl
=
0
match_nl
=
0
range
=
None
# (code, code)
range
=
None
# (code, code)
uppercase_range
=
None
# (code, code) or None
uppercase_range
=
None
# (code, code) or None
lowercase_range
=
None
# (code, code) or None
lowercase_range
=
None
# (code, code) or None
def
__init__
(
self
,
code1
,
code2
):
def
__init__
(
self
,
code1
,
code2
):
self
.
range
=
(
code1
,
code2
)
self
.
range
=
(
code1
,
code2
)
...
@@ -252,6 +259,7 @@ class RawCodeRange(RE):
...
@@ -252,6 +259,7 @@ class RawCodeRange(RE):
def
calc_str
(
self
):
def
calc_str
(
self
):
return
"CodeRange(%d,%d)"
%
(
self
.
code1
,
self
.
code2
)
return
"CodeRange(%d,%d)"
%
(
self
.
code1
,
self
.
code2
)
class
_RawNewline
(
RE
):
class
_RawNewline
(
RE
):
"""
"""
RawNewline is a low-level RE which matches a newline character.
RawNewline is a low-level RE which matches a newline character.
...
@@ -266,6 +274,7 @@ class _RawNewline(RE):
...
@@ -266,6 +274,7 @@ class _RawNewline(RE):
s
=
self
.
build_opt
(
m
,
initial_state
,
EOL
)
s
=
self
.
build_opt
(
m
,
initial_state
,
EOL
)
s
.
add_transition
((
nl_code
,
nl_code
+
1
),
final_state
)
s
.
add_transition
((
nl_code
,
nl_code
+
1
),
final_state
)
RawNewline
=
_RawNewline
()
RawNewline
=
_RawNewline
()
...
@@ -304,7 +313,7 @@ class Seq(RE):
...
@@ -304,7 +313,7 @@ class Seq(RE):
i
=
len
(
re_list
)
i
=
len
(
re_list
)
match_nl
=
0
match_nl
=
0
while
i
:
while
i
:
i
=
i
-
1
i
-=
1
re
=
re_list
[
i
]
re
=
re_list
[
i
]
if
re
.
match_nl
:
if
re
.
match_nl
:
match_nl
=
1
match_nl
=
1
...
@@ -354,7 +363,7 @@ class Alt(RE):
...
@@ -354,7 +363,7 @@ class Alt(RE):
non_nullable_res
.
append
(
re
)
non_nullable_res
.
append
(
re
)
if
re
.
match_nl
:
if
re
.
match_nl
:
match_nl
=
1
match_nl
=
1
i
=
i
+
1
i
+=
1
self
.
nullable_res
=
nullable_res
self
.
nullable_res
=
nullable_res
self
.
non_nullable_res
=
non_nullable_res
self
.
non_nullable_res
=
non_nullable_res
self
.
nullable
=
nullable
self
.
nullable
=
nullable
...
@@ -411,7 +420,7 @@ class SwitchCase(RE):
...
@@ -411,7 +420,7 @@ class SwitchCase(RE):
def
build_machine
(
self
,
m
,
initial_state
,
final_state
,
match_bol
,
nocase
):
def
build_machine
(
self
,
m
,
initial_state
,
final_state
,
match_bol
,
nocase
):
self
.
re
.
build_machine
(
m
,
initial_state
,
final_state
,
match_bol
,
self
.
re
.
build_machine
(
m
,
initial_state
,
final_state
,
match_bol
,
self
.
nocase
)
self
.
nocase
)
def
calc_str
(
self
):
def
calc_str
(
self
):
if
self
.
nocase
:
if
self
.
nocase
:
...
@@ -434,6 +443,7 @@ Empty.__doc__ = \
...
@@ -434,6 +443,7 @@ Empty.__doc__ = \
"""
"""
Empty
.
str
=
"Empty"
Empty
.
str
=
"Empty"
def
Str1
(
s
):
def
Str1
(
s
):
"""
"""
Str1(s) is an RE which matches the literal string |s|.
Str1(s) is an RE which matches the literal string |s|.
...
@@ -442,6 +452,7 @@ def Str1(s):
...
@@ -442,6 +452,7 @@ def Str1(s):
result
.
str
=
"Str(%s)"
%
repr
(
s
)
result
.
str
=
"Str(%s)"
%
repr
(
s
)
return
result
return
result
def
Str
(
*
strs
):
def
Str
(
*
strs
):
"""
"""
Str(s) is an RE which matches the literal string |s|.
Str(s) is an RE which matches the literal string |s|.
...
@@ -454,6 +465,7 @@ def Str(*strs):
...
@@ -454,6 +465,7 @@ def Str(*strs):
result
.
str
=
"Str(%s)"
%
','
.
join
(
map
(
repr
,
strs
))
result
.
str
=
"Str(%s)"
%
','
.
join
(
map
(
repr
,
strs
))
return
result
return
result
def
Any
(
s
):
def
Any
(
s
):
"""
"""
Any(s) is an RE which matches any character in the string |s|.
Any(s) is an RE which matches any character in the string |s|.
...
@@ -463,6 +475,7 @@ def Any(s):
...
@@ -463,6 +475,7 @@ def Any(s):
result
.
str
=
"Any(%s)"
%
repr
(
s
)
result
.
str
=
"Any(%s)"
%
repr
(
s
)
return
result
return
result
def
AnyBut
(
s
):
def
AnyBut
(
s
):
"""
"""
AnyBut(s) is an RE which matches any character (including
AnyBut(s) is an RE which matches any character (including
...
@@ -475,6 +488,7 @@ def AnyBut(s):
...
@@ -475,6 +488,7 @@ def AnyBut(s):
result
.
str
=
"AnyBut(%s)"
%
repr
(
s
)
result
.
str
=
"AnyBut(%s)"
%
repr
(
s
)
return
result
return
result
AnyChar
=
AnyBut
(
""
)
AnyChar
=
AnyBut
(
""
)
AnyChar
.
__doc__
=
\
AnyChar
.
__doc__
=
\
"""
"""
...
@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
...
@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
"""
"""
AnyChar
.
str
=
"AnyChar"
AnyChar
.
str
=
"AnyChar"
def
Range
(
s1
,
s2
=
None
):
def
Range
(
s1
,
s2
=
None
):
"""
"""
Range(c1, c2) is an RE which matches any single character in the range
Range(c1, c2) is an RE which matches any single character in the range
|c1| to |c2| inclusive.
|c1| to |c2| inclusive.
...
@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
...
@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
else
:
else
:
ranges
=
[]
ranges
=
[]
for
i
in
range
(
0
,
len
(
s1
),
2
):
for
i
in
range
(
0
,
len
(
s1
),
2
):
ranges
.
append
(
CodeRange
(
ord
(
s1
[
i
]),
ord
(
s1
[
i
+
1
])
+
1
))
ranges
.
append
(
CodeRange
(
ord
(
s1
[
i
]),
ord
(
s1
[
i
+
1
])
+
1
))
result
=
Alt
(
*
ranges
)
result
=
Alt
(
*
ranges
)
result
.
str
=
"Range(%s)"
%
repr
(
s1
)
result
.
str
=
"Range(%s)"
%
repr
(
s1
)
return
result
return
result
def
Opt
(
re
):
def
Opt
(
re
):
"""
"""
Opt(re) is an RE which matches either |re| or the empty string.
Opt(re) is an RE which matches either |re| or the empty string.
...
@@ -508,6 +524,7 @@ def Opt(re):
...
@@ -508,6 +524,7 @@ def Opt(re):
result
.
str
=
"Opt(%s)"
%
re
result
.
str
=
"Opt(%s)"
%
re
return
result
return
result
def
Rep
(
re
):
def
Rep
(
re
):
"""
"""
Rep(re) is an RE which matches zero or more repetitions of |re|.
Rep(re) is an RE which matches zero or more repetitions of |re|.
...
@@ -516,12 +533,14 @@ def Rep(re):
...
@@ -516,12 +533,14 @@ def Rep(re):
result
.
str
=
"Rep(%s)"
%
re
result
.
str
=
"Rep(%s)"
%
re
return
result
return
result
def
NoCase
(
re
):
def
NoCase
(
re
):
"""
"""
NoCase(re) is an RE which matches the same strings as RE, but treating
NoCase(re) is an RE which matches the same strings as RE, but treating
upper and lower case letters as equivalent.
upper and lower case letters as equivalent.
"""
"""
return
SwitchCase
(
re
,
nocase
=
1
)
return
SwitchCase
(
re
,
nocase
=
1
)
def
Case
(
re
):
def
Case
(
re
):
"""
"""
...
@@ -529,7 +548,7 @@ def Case(re):
...
@@ -529,7 +548,7 @@ def Case(re):
upper and lower case letters as distinct, i.e. it cancels the effect
upper and lower case letters as distinct, i.e. it cancels the effect
of any enclosing NoCase().
of any enclosing NoCase().
"""
"""
return
SwitchCase
(
re
,
nocase
=
0
)
return
SwitchCase
(
re
,
nocase
=
0
)
#
#
# RE Constants
# RE Constants
...
...
Cython/Plex/Scanners.py
View file @
5607fabd
This diff is collapsed.
Click to expand it.
Cython/Plex/Traditional.py
View file @
5607fabd
...
@@ -13,147 +13,146 @@ from .Errors import PlexError
...
@@ -13,147 +13,146 @@ from .Errors import PlexError
class
RegexpSyntaxError
(
PlexError
):
class
RegexpSyntaxError
(
PlexError
):
pass
pass
def
re
(
s
):
def
re
(
s
):
"""
"""
Convert traditional string representation of regular expression |s|
Convert traditional string representation of regular expression |s|
into Plex representation.
into Plex representation.
"""
"""
return
REParser
(
s
).
parse_re
()
return
REParser
(
s
).
parse_re
()
class
REParser
(
object
):
class
REParser
(
object
):
def
__init__
(
self
,
s
):
def
__init__
(
self
,
s
):
self
.
s
=
s
self
.
s
=
s
self
.
i
=
-
1
self
.
i
=
-
1
self
.
end
=
0
self
.
end
=
0
self
.
next
()
def
parse_re
(
self
):
re
=
self
.
parse_alt
()
if
not
self
.
end
:
self
.
error
(
"Unexpected %s"
%
repr
(
self
.
c
))
return
re
def
parse_alt
(
self
):
"""Parse a set of alternative regexps."""
re
=
self
.
parse_seq
()
if
self
.
c
==
'|'
:
re_list
=
[
re
]
while
self
.
c
==
'|'
:
self
.
next
()
self
.
next
()
re_list
.
append
(
self
.
parse_seq
())
re
=
Alt
(
*
re_list
)
def
parse_re
(
self
):
return
re
re
=
self
.
parse_alt
()
if
not
self
.
end
:
def
parse_seq
(
self
):
self
.
error
(
"Unexpected %s"
%
repr
(
self
.
c
))
"""Parse a sequence of regexps."""
return
re
re_list
=
[]
while
not
self
.
end
and
not
self
.
c
in
"|)"
:
def
parse_alt
(
self
):
re_list
.
append
(
self
.
parse_mod
())
"""Parse a set of alternative regexps."""
return
Seq
(
*
re_list
)
re
=
self
.
parse_seq
()
if
self
.
c
==
'|'
:
def
parse_mod
(
self
):
re_list
=
[
re
]
"""Parse a primitive regexp followed by *, +, ? modifiers."""
while
self
.
c
==
'|'
:
re
=
self
.
parse_prim
()
self
.
next
()
while
not
self
.
end
and
self
.
c
in
"*+?"
:
re_list
.
append
(
self
.
parse_seq
())
if
self
.
c
==
'*'
:
re
=
Alt
(
*
re_list
)
re
=
Rep
(
re
)
return
re
elif
self
.
c
==
'+'
:
re
=
Rep1
(
re
)
def
parse_seq
(
self
):
else
:
# self.c == '?'
"""Parse a sequence of regexps."""
re
=
Opt
(
re
)
re_list
=
[]
self
.
next
()
while
not
self
.
end
and
not
self
.
c
in
"|)"
:
return
re
re_list
.
append
(
self
.
parse_mod
())
return
Seq
(
*
re_list
)
def
parse_prim
(
self
):
"""Parse a primitive regexp."""
def
parse_mod
(
self
):
c
=
self
.
get
()
"""Parse a primitive regexp followed by *, +, ? modifiers."""
if
c
==
'.'
:
re
=
self
.
parse_prim
()
re
=
AnyBut
(
"
\
n
"
)
while
not
self
.
end
and
self
.
c
in
"*+?"
:
elif
c
==
'^'
:
if
self
.
c
==
'*'
:
re
=
Bol
re
=
Rep
(
re
)
elif
c
==
'$'
:
elif
self
.
c
==
'+'
:
re
=
Eol
re
=
Rep1
(
re
)
elif
c
==
'('
:
else
:
# self.c == '?'
re
=
self
.
parse_alt
()
re
=
Opt
(
re
)
self
.
expect
(
')'
)
self
.
next
()
elif
c
==
'['
:
return
re
re
=
self
.
parse_charset
()
self
.
expect
(
']'
)
def
parse_prim
(
self
):
else
:
"""Parse a primitive regexp."""
if
c
==
'
\
\
'
:
c
=
self
.
get
()
c
=
self
.
get
()
re
=
Char
(
c
)
if
c
==
'.'
:
return
re
re
=
AnyBut
(
"
\
n
"
)
elif
c
==
'^'
:
def
parse_charset
(
self
):
re
=
Bol
"""Parse a charset. Does not include the surrounding []."""
elif
c
==
'$'
:
char_list
=
[]
re
=
Eol
invert
=
0
elif
c
==
'('
:
if
self
.
c
==
'^'
:
re
=
self
.
parse_alt
()
invert
=
1
self
.
expect
(
')'
)
self
.
next
()
elif
c
==
'['
:
if
self
.
c
==
']'
:
re
=
self
.
parse_charset
()
char_list
.
append
(
']'
)
self
.
expect
(
']'
)
self
.
next
()
else
:
while
not
self
.
end
and
self
.
c
!=
']'
:
if
c
==
'
\
\
'
:
c1
=
self
.
get
()
c
=
self
.
get
()
if
self
.
c
==
'-'
and
self
.
lookahead
(
1
)
!=
']'
:
re
=
Char
(
c
)
return
re
def
parse_charset
(
self
):
"""Parse a charset. Does not include the surrounding []."""
char_list
=
[]
invert
=
0
if
self
.
c
==
'^'
:
invert
=
1
self
.
next
()
if
self
.
c
==
']'
:
char_list
.
append
(
']'
)
self
.
next
()
while
not
self
.
end
and
self
.
c
!=
']'
:
c1
=
self
.
get
()
if
self
.
c
==
'-'
and
self
.
lookahead
(
1
)
!=
']'
:
self
.
next
()
c2
=
self
.
get
()
for
a
in
xrange
(
ord
(
c1
),
ord
(
c2
)
+
1
):
char_list
.
append
(
chr
(
a
))
else
:
char_list
.
append
(
c1
)
chars
=
''
.
join
(
char_list
)
if
invert
:
return
AnyBut
(
chars
)
else
:
return
Any
(
chars
)
def
next
(
self
):
"""Advance to the next char."""
s
=
self
.
s
i
=
self
.
i
=
self
.
i
+
1
if
i
<
len
(
s
):
self
.
c
=
s
[
i
]
else
:
self
.
c
=
''
self
.
end
=
1
def
get
(
self
):
if
self
.
end
:
self
.
error
(
"Premature end of string"
)
c
=
self
.
c
self
.
next
()
self
.
next
()
c2
=
self
.
get
()
return
c
for
a
in
xrange
(
ord
(
c1
),
ord
(
c2
)
+
1
):
char_list
.
append
(
chr
(
a
))
def
lookahead
(
self
,
n
):
else
:
"""Look ahead n chars."""
char_list
.
append
(
c1
)
j
=
self
.
i
+
n
chars
=
''
.
join
(
char_list
)
if
j
<
len
(
self
.
s
):
if
invert
:
return
self
.
s
[
j
]
return
AnyBut
(
chars
)
else
:
else
:
return
''
return
Any
(
chars
)
def
expect
(
self
,
c
):
def
next
(
self
):
"""
"""Advance to the next char."""
Expect to find character |c| at current position.
s
=
self
.
s
Raises an exception otherwise.
i
=
self
.
i
=
self
.
i
+
1
"""
if
i
<
len
(
s
):
if
self
.
c
==
c
:
self
.
c
=
s
[
i
]
self
.
next
()
else
:
else
:
self
.
c
=
''
self
.
error
(
"Missing %s"
%
repr
(
c
))
self
.
end
=
1
def
error
(
self
,
mess
):
def
get
(
self
):
"""Raise exception to signal syntax error in regexp."""
if
self
.
end
:
raise
RegexpSyntaxError
(
"Syntax error in regexp %s at position %d: %s"
%
(
self
.
error
(
"Premature end of string"
)
repr
(
self
.
s
),
self
.
i
,
mess
))
c
=
self
.
c
self
.
next
()
return
c
def
lookahead
(
self
,
n
):
"""Look ahead n chars."""
j
=
self
.
i
+
n
if
j
<
len
(
self
.
s
):
return
self
.
s
[
j
]
else
:
return
''
def
expect
(
self
,
c
):
"""
Expect to find character |c| at current position.
Raises an exception otherwise.
"""
if
self
.
c
==
c
:
self
.
next
()
else
:
self
.
error
(
"Missing %s"
%
repr
(
c
))
def
error
(
self
,
mess
):
"""Raise exception to signal syntax error in regexp."""
raise
RegexpSyntaxError
(
"Syntax error in regexp %s at position %d: %s"
%
(
repr
(
self
.
s
),
self
.
i
,
mess
))
Cython/Plex/Transitions.py
View file @
5607fabd
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment