Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
C
cython
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Gwenaël Samain
cython
Commits
5607fabd
Commit
5607fabd
authored
Oct 10, 2014
by
Stefan Behnel
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
reformat Plex code files
parent
727e57d9
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
1329 additions
and
1292 deletions
+1329
-1292
Cython/Plex/Actions.py
Cython/Plex/Actions.py
+62
-59
Cython/Plex/DFA.py
Cython/Plex/DFA.py
+137
-132
Cython/Plex/Errors.py
Cython/Plex/Errors.py
+28
-24
Cython/Plex/Lexicons.py
Cython/Plex/Lexicons.py
+166
-164
Cython/Plex/Machines.py
Cython/Plex/Machines.py
+238
-237
Cython/Plex/Regexps.py
Cython/Plex/Regexps.py
+45
-26
Cython/Plex/Scanners.py
Cython/Plex/Scanners.py
+298
-296
Cython/Plex/Traditional.py
Cython/Plex/Traditional.py
+131
-132
Cython/Plex/Transitions.py
Cython/Plex/Transitions.py
+224
-222
No files found.
Cython/Plex/Actions.py
View file @
5607fabd
...
@@ -7,7 +7,6 @@
...
@@ -7,7 +7,6 @@
#=======================================================================
#=======================================================================
class
Action
(
object
):
class
Action
(
object
):
def
perform
(
self
,
token_stream
,
text
):
def
perform
(
self
,
token_stream
,
text
):
pass
# abstract
pass
# abstract
...
@@ -78,15 +77,18 @@ class Ignore(Action):
...
@@ -78,15 +77,18 @@ class Ignore(Action):
to be ignored. See the docstring of Plex.Lexicon for more
to be ignored. See the docstring of Plex.Lexicon for more
information.
information.
"""
"""
def
perform
(
self
,
token_stream
,
text
):
def
perform
(
self
,
token_stream
,
text
):
return
None
return
None
def
__repr__
(
self
):
def
__repr__
(
self
):
return
"IGNORE"
return
"IGNORE"
IGNORE
=
Ignore
()
IGNORE
=
Ignore
()
#IGNORE.__doc__ = Ignore.__doc__
#IGNORE.__doc__ = Ignore.__doc__
class
Text
(
Action
):
class
Text
(
Action
):
"""
"""
TEXT is a Plex action which causes the text of a token to
TEXT is a Plex action which causes the text of a token to
...
@@ -100,6 +102,7 @@ class Text(Action):
...
@@ -100,6 +102,7 @@ class Text(Action):
def
__repr__
(
self
):
def
__repr__
(
self
):
return
"TEXT"
return
"TEXT"
TEXT
=
Text
()
TEXT
=
Text
()
#TEXT.__doc__ = Text.__doc__
#TEXT.__doc__ = Text.__doc__
...
...
Cython/Plex/DFA.py
View file @
5607fabd
...
@@ -13,7 +13,7 @@ from .Machines import LOWEST_PRIORITY
...
@@ -13,7 +13,7 @@ from .Machines import LOWEST_PRIORITY
from
.Transitions
import
TransitionMap
from
.Transitions
import
TransitionMap
def
nfa_to_dfa
(
old_machine
,
debug
=
None
):
def
nfa_to_dfa
(
old_machine
,
debug
=
None
):
"""
"""
Given a nondeterministic Machine, return a new equivalent
Given a nondeterministic Machine, return a new equivalent
Machine which is deterministic.
Machine which is deterministic.
...
@@ -50,6 +50,7 @@ def nfa_to_dfa(old_machine, debug = None):
...
@@ -50,6 +50,7 @@ def nfa_to_dfa(old_machine, debug = None):
state_map
.
dump
(
debug
)
state_map
.
dump
(
debug
)
return
new_machine
return
new_machine
def
set_epsilon_closure
(
state_set
):
def
set_epsilon_closure
(
state_set
):
"""
"""
Given a set of states, return the union of the epsilon
Given a set of states, return the union of the epsilon
...
@@ -61,6 +62,7 @@ def set_epsilon_closure(state_set):
...
@@ -61,6 +62,7 @@ def set_epsilon_closure(state_set):
result
[
state2
]
=
1
result
[
state2
]
=
1
return
result
return
result
def
epsilon_closure
(
state
):
def
epsilon_closure
(
state
):
"""
"""
Return the set of states reachable from the given state
Return the set of states reachable from the given state
...
@@ -74,6 +76,7 @@ def epsilon_closure(state):
...
@@ -74,6 +76,7 @@ def epsilon_closure(state):
add_to_epsilon_closure
(
result
,
state
)
add_to_epsilon_closure
(
result
,
state
)
return
result
return
result
def
add_to_epsilon_closure
(
state_set
,
state
):
def
add_to_epsilon_closure
(
state_set
,
state
):
"""
"""
Recursively add to |state_set| states reachable from the given state
Recursively add to |state_set| states reachable from the given state
...
@@ -86,6 +89,7 @@ def add_to_epsilon_closure(state_set, state):
...
@@ -86,6 +89,7 @@ def add_to_epsilon_closure(state_set, state):
for
state2
in
state_set_2
:
for
state2
in
state_set_2
:
add_to_epsilon_closure
(
state_set
,
state2
)
add_to_epsilon_closure
(
state_set
,
state2
)
class
StateMap
(
object
):
class
StateMap
(
object
):
"""
"""
Helper class used by nfa_to_dfa() to map back and forth between
Helper class used by nfa_to_dfa() to map back and forth between
...
@@ -98,7 +102,7 @@ class StateMap(object):
...
@@ -98,7 +102,7 @@ class StateMap(object):
def
__init__
(
self
,
new_machine
):
def
__init__
(
self
,
new_machine
):
self
.
new_machine
=
new_machine
self
.
new_machine
=
new_machine
self
.
old_to_new_dict
=
{}
self
.
old_to_new_dict
=
{}
self
.
new_to_old_dict
=
{}
self
.
new_to_old_dict
=
{}
def
old_to_new
(
self
,
old_state_set
):
def
old_to_new
(
self
,
old_state_set
):
"""
"""
...
@@ -129,12 +133,12 @@ class StateMap(object):
...
@@ -129,12 +133,12 @@ class StateMap(object):
best_priority
=
priority
best_priority
=
priority
return
best_action
return
best_action
# def old_to_new_set(self, old_state_set):
# def old_to_new_set(self, old_state_set):
# """
# """
# Return the new state corresponding to a set of old states as
# Return the new state corresponding to a set of old states as
# a singleton set.
# a singleton set.
# """
# """
# return {self.old_to_new(old_state_set):1}
# return {self.old_to_new(old_state_set):1}
def
new_to_old
(
self
,
new_state
):
def
new_to_old
(
self
,
new_state
):
"""Given a new state, return a set of corresponding old states."""
"""Given a new state, return a set of corresponding old states."""
...
@@ -151,6 +155,7 @@ class StateMap(object):
...
@@ -151,6 +155,7 @@ class StateMap(object):
def
dump
(
self
,
file
):
def
dump
(
self
,
file
):
from
.Transitions
import
state_set_str
from
.Transitions
import
state_set_str
for
new_state
in
self
.
new_machine
.
states
:
for
new_state
in
self
.
new_machine
.
states
:
old_state_set
=
self
.
new_to_old_dict
[
id
(
new_state
)]
old_state_set
=
self
.
new_to_old_dict
[
id
(
new_state
)]
file
.
write
(
" State %s <-- %s
\
n
"
%
(
file
.
write
(
" State %s <-- %s
\
n
"
%
(
...
...
Cython/Plex/Errors.py
View file @
5607fabd
...
@@ -6,32 +6,39 @@
...
@@ -6,32 +6,39 @@
#
#
#=======================================================================
#=======================================================================
class
PlexError
(
Exception
):
class
PlexError
(
Exception
):
message
=
""
message
=
""
class
PlexTypeError
(
PlexError
,
TypeError
):
class
PlexTypeError
(
PlexError
,
TypeError
):
pass
pass
class
PlexValueError
(
PlexError
,
ValueError
):
class
PlexValueError
(
PlexError
,
ValueError
):
pass
pass
class
InvalidRegex
(
PlexError
):
class
InvalidRegex
(
PlexError
):
pass
pass
class
InvalidToken
(
PlexError
):
class
InvalidToken
(
PlexError
):
def
__init__
(
self
,
token_number
,
message
):
def
__init__
(
self
,
token_number
,
message
):
PlexError
.
__init__
(
self
,
"Token number %d: %s"
%
(
token_number
,
message
))
PlexError
.
__init__
(
self
,
"Token number %d: %s"
%
(
token_number
,
message
))
class
InvalidScanner
(
PlexError
):
class
InvalidScanner
(
PlexError
):
pass
pass
class
AmbiguousAction
(
PlexError
):
class
AmbiguousAction
(
PlexError
):
message
=
"Two tokens with different actions can match the same string"
message
=
"Two tokens with different actions can match the same string"
def
__init__
(
self
):
def
__init__
(
self
):
pass
pass
class
UnrecognizedInput
(
PlexError
):
class
UnrecognizedInput
(
PlexError
):
scanner
=
None
scanner
=
None
position
=
None
position
=
None
...
@@ -43,8 +50,5 @@ class UnrecognizedInput(PlexError):
...
@@ -43,8 +50,5 @@ class UnrecognizedInput(PlexError):
self
.
state_name
=
state_name
self
.
state_name
=
state_name
def
__str__
(
self
):
def
__str__
(
self
):
return
(
"'%s', line %d, char %d: Token not recognised in state %s"
return
(
"'%s', line %d, char %d: Token not recognised in state %s"
%
(
%
(
self
.
position
+
(
repr
(
self
.
state_name
),)))
self
.
position
+
(
repr
(
self
.
state_name
),)))
Cython/Plex/Lexicons.py
View file @
5607fabd
...
@@ -38,6 +38,7 @@ class State(object):
...
@@ -38,6 +38,7 @@ class State(object):
self
.
name
=
name
self
.
name
=
name
self
.
tokens
=
tokens
self
.
tokens
=
tokens
class
Lexicon
(
object
):
class
Lexicon
(
object
):
"""
"""
Lexicon(specification) builds a lexical analyser from the given
Lexicon(specification) builds a lexical analyser from the given
...
@@ -113,11 +114,12 @@ class Lexicon(object):
...
@@ -113,11 +114,12 @@ class Lexicon(object):
machine
=
None
# Machine
machine
=
None
# Machine
tables
=
None
# StateTableMachine
tables
=
None
# StateTableMachine
def
__init__
(
self
,
specifications
,
debug
=
None
,
debug_flags
=
7
,
timings
=
None
):
def
__init__
(
self
,
specifications
,
debug
=
None
,
debug_flags
=
7
,
timings
=
None
):
if
type
(
specifications
)
!=
types
.
ListType
:
if
type
(
specifications
)
!=
types
.
ListType
:
raise
Errors
.
InvalidScanner
(
"Scanner definition is not a list"
)
raise
Errors
.
InvalidScanner
(
"Scanner definition is not a list"
)
if
timings
:
if
timings
:
from
.Timing
import
time
from
.Timing
import
time
total_time
=
0.0
total_time
=
0.0
time1
=
time
()
time1
=
time
()
nfa
=
Machines
.
Machine
()
nfa
=
Machines
.
Machine
()
...
@@ -129,11 +131,11 @@ class Lexicon(object):
...
@@ -129,11 +131,11 @@ class Lexicon(object):
for
token
in
spec
.
tokens
:
for
token
in
spec
.
tokens
:
self
.
add_token_to_machine
(
self
.
add_token_to_machine
(
nfa
,
user_initial_state
,
token
,
token_number
)
nfa
,
user_initial_state
,
token
,
token_number
)
token_number
=
token_number
+
1
token_number
+=
1
elif
type
(
spec
)
==
types
.
TupleType
:
elif
type
(
spec
)
==
types
.
TupleType
:
self
.
add_token_to_machine
(
self
.
add_token_to_machine
(
nfa
,
default_initial_state
,
spec
,
token_number
)
nfa
,
default_initial_state
,
spec
,
token_number
)
token_number
=
token_number
+
1
token_number
+=
1
else
:
else
:
raise
Errors
.
InvalidToken
(
raise
Errors
.
InvalidToken
(
token_number
,
token_number
,
...
@@ -145,7 +147,7 @@ class Lexicon(object):
...
@@ -145,7 +147,7 @@ class Lexicon(object):
if
debug
and
(
debug_flags
&
1
):
if
debug
and
(
debug_flags
&
1
):
debug
.
write
(
"
\
n
============= NFA ===========
\
n
"
)
debug
.
write
(
"
\
n
============= NFA ===========
\
n
"
)
nfa
.
dump
(
debug
)
nfa
.
dump
(
debug
)
dfa
=
DFA
.
nfa_to_dfa
(
nfa
,
debug
=
(
debug_flags
&
3
)
==
3
and
debug
)
dfa
=
DFA
.
nfa_to_dfa
(
nfa
,
debug
=
(
debug_flags
&
3
)
==
3
and
debug
)
if
timings
:
if
timings
:
time4
=
time
()
time4
=
time
()
total_time
=
total_time
+
(
time4
-
time3
)
total_time
=
total_time
+
(
time4
-
time3
)
...
@@ -176,8 +178,8 @@ class Lexicon(object):
...
@@ -176,8 +178,8 @@ class Lexicon(object):
action
=
Actions
.
Call
(
action_spec
)
action
=
Actions
.
Call
(
action_spec
)
final_state
=
machine
.
new_state
()
final_state
=
machine
.
new_state
()
re
.
build_machine
(
machine
,
initial_state
,
final_state
,
re
.
build_machine
(
machine
,
initial_state
,
final_state
,
match_bol
=
1
,
nocase
=
0
)
match_bol
=
1
,
nocase
=
0
)
final_state
.
set_action
(
action
,
priority
=
-
token_number
)
final_state
.
set_action
(
action
,
priority
=
-
token_number
)
except
Errors
.
PlexError
,
e
:
except
Errors
.
PlexError
,
e
:
raise
e
.
__class__
(
"Token number %d: %s"
%
(
token_number
,
e
))
raise
e
.
__class__
(
"Token number %d: %s"
%
(
token_number
,
e
))
...
...
Cython/Plex/Machines.py
View file @
5607fabd
...
@@ -59,6 +59,7 @@ class Machine(object):
...
@@ -59,6 +59,7 @@ class Machine(object):
for
s
in
self
.
states
:
for
s
in
self
.
states
:
s
.
dump
(
file
)
s
.
dump
(
file
)
class
Node
(
object
):
class
Node
(
object
):
"""A state of an NFA or DFA."""
"""A state of an NFA or DFA."""
transitions
=
None
# TransitionMap
transitions
=
None
# TransitionMap
...
@@ -111,7 +112,7 @@ class Node(object):
...
@@ -111,7 +112,7 @@ class Node(object):
# Header
# Header
file
.
write
(
" State %d:
\
n
"
%
self
.
number
)
file
.
write
(
" State %d:
\
n
"
%
self
.
number
)
# Transitions
# Transitions
# self.dump_transitions(file)
# self.dump_transitions(file)
self
.
transitions
.
dump
(
file
)
self
.
transitions
.
dump
(
file
)
# Action
# Action
action
=
self
.
action
action
=
self
.
action
...
@@ -122,21 +123,21 @@ class Node(object):
...
@@ -122,21 +123,21 @@ class Node(object):
def
__lt__
(
self
,
other
):
def
__lt__
(
self
,
other
):
return
self
.
number
<
other
.
number
return
self
.
number
<
other
.
number
class
FastMachine
(
object
):
class
FastMachine
(
object
):
"""
"""
FastMachine is a deterministic machine represented in a way that
FastMachine is a deterministic machine represented in a way that
allows fast scanning.
allows fast scanning.
"""
"""
initial_states
=
None
# {state_name:state}
initial_states
=
None
# {state_name:state}
states
=
None
# [state]
states
=
None
# [state] where state = {event:state, 'else':state, 'action':Action}
# where state = {event:state, 'else':state, 'action':Action}
next_number
=
1
# for debugging
next_number
=
1
# for debugging
new_state_template
=
{
new_state_template
=
{
''
:
None
,
'bol'
:
None
,
'eol'
:
None
,
'eof'
:
None
,
'else'
:
None
''
:
None
,
'bol'
:
None
,
'eol'
:
None
,
'eof'
:
None
,
'else'
:
None
}
}
def
__init__
(
self
,
old_machine
=
None
):
def
__init__
(
self
,
old_machine
=
None
):
self
.
initial_states
=
initial_states
=
{}
self
.
initial_states
=
initial_states
=
{}
self
.
states
=
[]
self
.
states
=
[]
if
old_machine
:
if
old_machine
:
...
@@ -159,7 +160,7 @@ class FastMachine(object):
...
@@ -159,7 +160,7 @@ class FastMachine(object):
for
state
in
self
.
states
:
for
state
in
self
.
states
:
state
.
clear
()
state
.
clear
()
def
new_state
(
self
,
action
=
None
):
def
new_state
(
self
,
action
=
None
):
number
=
self
.
next_number
number
=
self
.
next_number
self
.
next_number
=
number
+
1
self
.
next_number
=
number
+
1
result
=
self
.
new_state_template
.
copy
()
result
=
self
.
new_state_template
.
copy
()
...
@@ -179,7 +180,7 @@ class FastMachine(object):
...
@@ -179,7 +180,7 @@ class FastMachine(object):
elif
code1
!=
maxint
:
elif
code1
!=
maxint
:
while
code0
<
code1
:
while
code0
<
code1
:
state
[
unichr
(
code0
)]
=
new_state
state
[
unichr
(
code0
)]
=
new_state
code0
=
code0
+
1
code0
+=
1
else
:
else
:
state
[
event
]
=
new_state
state
[
event
]
=
new_state
...
@@ -241,10 +242,10 @@ class FastMachine(object):
...
@@ -241,10 +242,10 @@ class FastMachine(object):
while
i
<
n
:
while
i
<
n
:
c1
=
ord
(
char_list
[
i
])
c1
=
ord
(
char_list
[
i
])
c2
=
c1
c2
=
c1
i
=
i
+
1
i
+=
1
while
i
<
n
and
ord
(
char_list
[
i
])
==
c2
+
1
:
while
i
<
n
and
ord
(
char_list
[
i
])
==
c2
+
1
:
i
=
i
+
1
i
+=
1
c2
=
c2
+
1
c2
+=
1
result
.
append
((
chr
(
c1
),
chr
(
c2
)))
result
.
append
((
chr
(
c1
),
chr
(
c2
)))
return
tuple
(
result
)
return
tuple
(
result
)
...
...
Cython/Plex/Regexps.py
View file @
5607fabd
...
@@ -42,14 +42,15 @@ def chars_to_ranges(s):
...
@@ -42,14 +42,15 @@ def chars_to_ranges(s):
while
i
<
n
:
while
i
<
n
:
code1
=
ord
(
char_list
[
i
])
code1
=
ord
(
char_list
[
i
])
code2
=
code1
+
1
code2
=
code1
+
1
i
=
i
+
1
i
+=
1
while
i
<
n
and
code2
>=
ord
(
char_list
[
i
]):
while
i
<
n
and
code2
>=
ord
(
char_list
[
i
]):
code2
=
code2
+
1
code2
+=
1
i
=
i
+
1
i
+=
1
result
.
append
(
code1
)
result
.
append
(
code1
)
result
.
append
(
code2
)
result
.
append
(
code2
)
return
result
return
result
def
uppercase_range
(
code1
,
code2
):
def
uppercase_range
(
code1
,
code2
):
"""
"""
If the range of characters from code1 to code2-1 includes any
If the range of characters from code1 to code2-1 includes any
...
@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
...
@@ -63,6 +64,7 @@ def uppercase_range(code1, code2):
else
:
else
:
return
None
return
None
def
lowercase_range
(
code1
,
code2
):
def
lowercase_range
(
code1
,
code2
):
"""
"""
If the range of characters from code1 to code2-1 includes any
If the range of characters from code1 to code2-1 includes any
...
@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
...
@@ -76,6 +78,7 @@ def lowercase_range(code1, code2):
else
:
else
:
return
None
return
None
def
CodeRanges
(
code_list
):
def
CodeRanges
(
code_list
):
"""
"""
Given a list of codes as returned by chars_to_ranges, return
Given a list of codes as returned by chars_to_ranges, return
...
@@ -86,6 +89,7 @@ def CodeRanges(code_list):
...
@@ -86,6 +89,7 @@ def CodeRanges(code_list):
re_list
.
append
(
CodeRange
(
code_list
[
i
],
code_list
[
i
+
1
]))
re_list
.
append
(
CodeRange
(
code_list
[
i
],
code_list
[
i
+
1
]))
return
Alt
(
*
re_list
)
return
Alt
(
*
re_list
)
def
CodeRange
(
code1
,
code2
):
def
CodeRange
(
code1
,
code2
):
"""
"""
CodeRange(code1, code2) is an RE which matches any character
CodeRange(code1, code2) is an RE which matches any character
...
@@ -98,6 +102,7 @@ def CodeRange(code1, code2):
...
@@ -98,6 +102,7 @@ def CodeRange(code1, code2):
else
:
else
:
return
RawCodeRange
(
code1
,
code2
)
return
RawCodeRange
(
code1
,
code2
)
#
#
# Abstract classes
# Abstract classes
#
#
...
@@ -211,6 +216,7 @@ class RE(object):
...
@@ -211,6 +216,7 @@ class RE(object):
## def calc_str(self):
## def calc_str(self):
## return "Char(%s)" % repr(self.char)
## return "Char(%s)" % repr(self.char)
def
Char
(
c
):
def
Char
(
c
):
"""
"""
Char(c) is an RE which matches the character |c|.
Char(c) is an RE which matches the character |c|.
...
@@ -222,6 +228,7 @@ def Char(c):
...
@@ -222,6 +228,7 @@ def Char(c):
result
.
str
=
"Char(%s)"
%
repr
(
c
)
result
.
str
=
"Char(%s)"
%
repr
(
c
)
return
result
return
result
class
RawCodeRange
(
RE
):
class
RawCodeRange
(
RE
):
"""
"""
RawCodeRange(code1, code2) is a low-level RE which matches any character
RawCodeRange(code1, code2) is a low-level RE which matches any character
...
@@ -252,6 +259,7 @@ class RawCodeRange(RE):
...
@@ -252,6 +259,7 @@ class RawCodeRange(RE):
def
calc_str
(
self
):
def
calc_str
(
self
):
return
"CodeRange(%d,%d)"
%
(
self
.
code1
,
self
.
code2
)
return
"CodeRange(%d,%d)"
%
(
self
.
code1
,
self
.
code2
)
class
_RawNewline
(
RE
):
class
_RawNewline
(
RE
):
"""
"""
RawNewline is a low-level RE which matches a newline character.
RawNewline is a low-level RE which matches a newline character.
...
@@ -266,6 +274,7 @@ class _RawNewline(RE):
...
@@ -266,6 +274,7 @@ class _RawNewline(RE):
s
=
self
.
build_opt
(
m
,
initial_state
,
EOL
)
s
=
self
.
build_opt
(
m
,
initial_state
,
EOL
)
s
.
add_transition
((
nl_code
,
nl_code
+
1
),
final_state
)
s
.
add_transition
((
nl_code
,
nl_code
+
1
),
final_state
)
RawNewline
=
_RawNewline
()
RawNewline
=
_RawNewline
()
...
@@ -304,7 +313,7 @@ class Seq(RE):
...
@@ -304,7 +313,7 @@ class Seq(RE):
i
=
len
(
re_list
)
i
=
len
(
re_list
)
match_nl
=
0
match_nl
=
0
while
i
:
while
i
:
i
=
i
-
1
i
-=
1
re
=
re_list
[
i
]
re
=
re_list
[
i
]
if
re
.
match_nl
:
if
re
.
match_nl
:
match_nl
=
1
match_nl
=
1
...
@@ -354,7 +363,7 @@ class Alt(RE):
...
@@ -354,7 +363,7 @@ class Alt(RE):
non_nullable_res
.
append
(
re
)
non_nullable_res
.
append
(
re
)
if
re
.
match_nl
:
if
re
.
match_nl
:
match_nl
=
1
match_nl
=
1
i
=
i
+
1
i
+=
1
self
.
nullable_res
=
nullable_res
self
.
nullable_res
=
nullable_res
self
.
non_nullable_res
=
non_nullable_res
self
.
non_nullable_res
=
non_nullable_res
self
.
nullable
=
nullable
self
.
nullable
=
nullable
...
@@ -434,6 +443,7 @@ Empty.__doc__ = \
...
@@ -434,6 +443,7 @@ Empty.__doc__ = \
"""
"""
Empty
.
str
=
"Empty"
Empty
.
str
=
"Empty"
def
Str1
(
s
):
def
Str1
(
s
):
"""
"""
Str1(s) is an RE which matches the literal string |s|.
Str1(s) is an RE which matches the literal string |s|.
...
@@ -442,6 +452,7 @@ def Str1(s):
...
@@ -442,6 +452,7 @@ def Str1(s):
result
.
str
=
"Str(%s)"
%
repr
(
s
)
result
.
str
=
"Str(%s)"
%
repr
(
s
)
return
result
return
result
def
Str
(
*
strs
):
def
Str
(
*
strs
):
"""
"""
Str(s) is an RE which matches the literal string |s|.
Str(s) is an RE which matches the literal string |s|.
...
@@ -454,6 +465,7 @@ def Str(*strs):
...
@@ -454,6 +465,7 @@ def Str(*strs):
result
.
str
=
"Str(%s)"
%
','
.
join
(
map
(
repr
,
strs
))
result
.
str
=
"Str(%s)"
%
','
.
join
(
map
(
repr
,
strs
))
return
result
return
result
def
Any
(
s
):
def
Any
(
s
):
"""
"""
Any(s) is an RE which matches any character in the string |s|.
Any(s) is an RE which matches any character in the string |s|.
...
@@ -463,6 +475,7 @@ def Any(s):
...
@@ -463,6 +475,7 @@ def Any(s):
result
.
str
=
"Any(%s)"
%
repr
(
s
)
result
.
str
=
"Any(%s)"
%
repr
(
s
)
return
result
return
result
def
AnyBut
(
s
):
def
AnyBut
(
s
):
"""
"""
AnyBut(s) is an RE which matches any character (including
AnyBut(s) is an RE which matches any character (including
...
@@ -475,6 +488,7 @@ def AnyBut(s):
...
@@ -475,6 +488,7 @@ def AnyBut(s):
result
.
str
=
"AnyBut(%s)"
%
repr
(
s
)
result
.
str
=
"AnyBut(%s)"
%
repr
(
s
)
return
result
return
result
AnyChar
=
AnyBut
(
""
)
AnyChar
=
AnyBut
(
""
)
AnyChar
.
__doc__
=
\
AnyChar
.
__doc__
=
\
"""
"""
...
@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
...
@@ -482,7 +496,8 @@ AnyChar.__doc__ = \
"""
"""
AnyChar
.
str
=
"AnyChar"
AnyChar
.
str
=
"AnyChar"
def
Range
(
s1
,
s2
=
None
):
def
Range
(
s1
,
s2
=
None
):
"""
"""
Range(c1, c2) is an RE which matches any single character in the range
Range(c1, c2) is an RE which matches any single character in the range
|c1| to |c2| inclusive.
|c1| to |c2| inclusive.
...
@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
...
@@ -495,11 +510,12 @@ def Range(s1, s2 = None):
else
:
else
:
ranges
=
[]
ranges
=
[]
for
i
in
range
(
0
,
len
(
s1
),
2
):
for
i
in
range
(
0
,
len
(
s1
),
2
):
ranges
.
append
(
CodeRange
(
ord
(
s1
[
i
]),
ord
(
s1
[
i
+
1
])
+
1
))
ranges
.
append
(
CodeRange
(
ord
(
s1
[
i
]),
ord
(
s1
[
i
+
1
])
+
1
))
result
=
Alt
(
*
ranges
)
result
=
Alt
(
*
ranges
)
result
.
str
=
"Range(%s)"
%
repr
(
s1
)
result
.
str
=
"Range(%s)"
%
repr
(
s1
)
return
result
return
result
def
Opt
(
re
):
def
Opt
(
re
):
"""
"""
Opt(re) is an RE which matches either |re| or the empty string.
Opt(re) is an RE which matches either |re| or the empty string.
...
@@ -508,6 +524,7 @@ def Opt(re):
...
@@ -508,6 +524,7 @@ def Opt(re):
result
.
str
=
"Opt(%s)"
%
re
result
.
str
=
"Opt(%s)"
%
re
return
result
return
result
def
Rep
(
re
):
def
Rep
(
re
):
"""
"""
Rep(re) is an RE which matches zero or more repetitions of |re|.
Rep(re) is an RE which matches zero or more repetitions of |re|.
...
@@ -516,12 +533,14 @@ def Rep(re):
...
@@ -516,12 +533,14 @@ def Rep(re):
result
.
str
=
"Rep(%s)"
%
re
result
.
str
=
"Rep(%s)"
%
re
return
result
return
result
def
NoCase
(
re
):
def
NoCase
(
re
):
"""
"""
NoCase(re) is an RE which matches the same strings as RE, but treating
NoCase(re) is an RE which matches the same strings as RE, but treating
upper and lower case letters as equivalent.
upper and lower case letters as equivalent.
"""
"""
return
SwitchCase
(
re
,
nocase
=
1
)
return
SwitchCase
(
re
,
nocase
=
1
)
def
Case
(
re
):
def
Case
(
re
):
"""
"""
...
@@ -529,7 +548,7 @@ def Case(re):
...
@@ -529,7 +548,7 @@ def Case(re):
upper and lower case letters as distinct, i.e. it cancels the effect
upper and lower case letters as distinct, i.e. it cancels the effect
of any enclosing NoCase().
of any enclosing NoCase().
"""
"""
return
SwitchCase
(
re
,
nocase
=
0
)
return
SwitchCase
(
re
,
nocase
=
0
)
#
#
# RE Constants
# RE Constants
...
...
Cython/Plex/Scanners.py
View file @
5607fabd
...
@@ -10,6 +10,7 @@
...
@@ -10,6 +10,7 @@
from
__future__
import
absolute_import
from
__future__
import
absolute_import
import
cython
import
cython
cython
.
declare
(
BOL
=
object
,
EOL
=
object
,
EOF
=
object
,
NOT_FOUND
=
object
)
cython
.
declare
(
BOL
=
object
,
EOL
=
object
,
EOF
=
object
,
NOT_FOUND
=
object
)
from
.
import
Errors
from
.
import
Errors
...
@@ -50,25 +51,25 @@ class Scanner(object):
...
@@ -50,25 +51,25 @@ class Scanner(object):
"""
"""
# lexicon = None # Lexicon
# lexicon = None # Lexicon
# stream = None # file-like object
# stream = None # file-like object
# name = ''
# name = ''
# buffer = ''
# buffer = ''
# buf_start_pos = 0 # position in input of start of buffer
# buf_start_pos = 0 # position in input of start of buffer
# next_pos = 0 # position in input of next char to read
# next_pos = 0 # position in input of next char to read
# cur_pos = 0 # position in input of current char
# cur_pos = 0 # position in input of current char
# cur_line = 1 # line number of current char
# cur_line = 1 # line number of current char
# cur_line_start = 0 # position in input of start of current line
# cur_line_start = 0 # position in input of start of current line
# start_pos = 0 # position in input of start of token
# start_pos = 0 # position in input of start of token
# start_line = 0 # line number of start of token
# start_line = 0 # line number of start of token
# start_col = 0 # position in line of start of token
# start_col = 0 # position in line of start of token
# text = None # text of last token read
# text = None # text of last token read
# initial_state = None # Node
# initial_state = None # Node
# state_name = '' # Name of initial state
# state_name = '' # Name of initial state
# queue = None # list of tokens to be returned
# queue = None # list of tokens to be returned
# trace = 0
# trace = 0
def
__init__
(
self
,
lexicon
,
stream
,
name
=
''
,
initial_pos
=
None
):
def
__init__
(
self
,
lexicon
,
stream
,
name
=
''
,
initial_pos
=
None
):
"""
"""
Scanner(lexicon, stream, name = '')
Scanner(lexicon, stream, name = '')
...
@@ -143,7 +144,8 @@ class Scanner(object):
...
@@ -143,7 +144,8 @@ class Scanner(object):
if
self
.
trace
:
if
self
.
trace
:
print
(
"Scanner: read: Performing %s %d:%d"
%
(
print
(
"Scanner: read: Performing %s %d:%d"
%
(
action
,
self
.
start_pos
,
self
.
cur_pos
))
action
,
self
.
start_pos
,
self
.
cur_pos
))
text
=
self
.
buffer
[
self
.
start_pos
-
self
.
buf_start_pos
:
text
=
self
.
buffer
[
self
.
start_pos
-
self
.
buf_start_pos
:
self
.
cur_pos
-
self
.
buf_start_pos
]
self
.
cur_pos
-
self
.
buf_start_pos
]
return
(
text
,
action
)
return
(
text
,
action
)
else
:
else
:
...
@@ -198,19 +200,19 @@ class Scanner(object):
...
@@ -198,19 +200,19 @@ class Scanner(object):
buf_index
=
next_pos
-
buf_start_pos
buf_index
=
next_pos
-
buf_start_pos
if
buf_index
<
buf_len
:
if
buf_index
<
buf_len
:
c
=
buffer
[
buf_index
]
c
=
buffer
[
buf_index
]
next_pos
=
next_pos
+
1
next_pos
+=
1
else
:
else
:
discard
=
self
.
start_pos
-
buf_start_pos
discard
=
self
.
start_pos
-
buf_start_pos
data
=
self
.
stream
.
read
(
0x1000
)
data
=
self
.
stream
.
read
(
0x1000
)
buffer
=
self
.
buffer
[
discard
:]
+
data
buffer
=
self
.
buffer
[
discard
:]
+
data
self
.
buffer
=
buffer
self
.
buffer
=
buffer
buf_start_pos
=
buf_start_pos
+
discard
buf_start_pos
+=
discard
self
.
buf_start_pos
=
buf_start_pos
self
.
buf_start_pos
=
buf_start_pos
buf_len
=
len
(
buffer
)
buf_len
=
len
(
buffer
)
buf_index
=
buf_index
-
discard
buf_index
-=
discard
if
data
:
if
data
:
c
=
buffer
[
buf_index
]
c
=
buffer
[
buf_index
]
next_pos
=
next_pos
+
1
next_pos
+=
1
else
:
else
:
c
=
u''
c
=
u''
# End inlined: c = self.read_char()
# End inlined: c = self.read_char()
...
@@ -226,7 +228,7 @@ class Scanner(object):
...
@@ -226,7 +228,7 @@ class Scanner(object):
cur_char
=
u'
\
n
'
cur_char
=
u'
\
n
'
input_state
=
3
input_state
=
3
elif
input_state
==
3
:
elif
input_state
==
3
:
cur_line
=
cur_line
+
1
cur_line
+=
1
cur_line_start
=
cur_pos
=
next_pos
cur_line_start
=
cur_pos
=
next_pos
cur_char
=
BOL
cur_char
=
BOL
input_state
=
1
input_state
=
1
...
@@ -263,7 +265,7 @@ class Scanner(object):
...
@@ -263,7 +265,7 @@ class Scanner(object):
def
next_char
(
self
):
def
next_char
(
self
):
input_state
=
self
.
input_state
input_state
=
self
.
input_state
if
self
.
trace
:
if
self
.
trace
:
print
(
"Scanner: next: %s [%d] %d"
%
(
" "
*
20
,
input_state
,
self
.
cur_pos
))
print
(
"Scanner: next: %s [%d] %d"
%
(
" "
*
20
,
input_state
,
self
.
cur_pos
))
if
input_state
==
1
:
if
input_state
==
1
:
self
.
cur_pos
=
self
.
next_pos
self
.
cur_pos
=
self
.
next_pos
c
=
self
.
read_char
()
c
=
self
.
read_char
()
...
@@ -279,7 +281,7 @@ class Scanner(object):
...
@@ -279,7 +281,7 @@ class Scanner(object):
self
.
cur_char
=
u'
\
n
'
self
.
cur_char
=
u'
\
n
'
self
.
input_state
=
3
self
.
input_state
=
3
elif
input_state
==
3
:
elif
input_state
==
3
:
self
.
cur_line
=
self
.
cur_line
+
1
self
.
cur_line
+=
1
self
.
cur_line_start
=
self
.
cur_pos
=
self
.
next_pos
self
.
cur_line_start
=
self
.
cur_pos
=
self
.
next_pos
self
.
cur_char
=
BOL
self
.
cur_char
=
BOL
self
.
input_state
=
1
self
.
input_state
=
1
...
@@ -313,7 +315,7 @@ class Scanner(object):
...
@@ -313,7 +315,7 @@ class Scanner(object):
self
.
lexicon
.
get_initial_state
(
state_name
))
self
.
lexicon
.
get_initial_state
(
state_name
))
self
.
state_name
=
state_name
self
.
state_name
=
state_name
def
produce
(
self
,
value
,
text
=
None
):
def
produce
(
self
,
value
,
text
=
None
):
"""
"""
Called from an action procedure, causes |value| to be returned
Called from an action procedure, causes |value| to be returned
as the token value from read(). If |text| is supplied, it is
as the token value from read(). If |text| is supplied, it is
...
...
Cython/Plex/Traditional.py
View file @
5607fabd
...
@@ -25,7 +25,6 @@ def re(s):
...
@@ -25,7 +25,6 @@ def re(s):
class
REParser
(
object
):
class
REParser
(
object
):
def
__init__
(
self
,
s
):
def
__init__
(
self
,
s
):
self
.
s
=
s
self
.
s
=
s
self
.
i
=
-
1
self
.
i
=
-
1
...
...
Cython/Plex/Transitions.py
View file @
5607fabd
...
@@ -40,7 +40,7 @@ class TransitionMap(object):
...
@@ -40,7 +40,7 @@ class TransitionMap(object):
map
=
None
# The list of codes and states
map
=
None
# The list of codes and states
special
=
None
# Mapping for special events
special
=
None
# Mapping for special events
def
__init__
(
self
,
map
=
None
,
special
=
None
):
def
__init__
(
self
,
map
=
None
,
special
=
None
):
if
not
map
:
if
not
map
:
map
=
[
-
maxint
,
{},
maxint
]
map
=
[
-
maxint
,
{},
maxint
]
if
not
special
:
if
not
special
:
...
@@ -50,7 +50,7 @@ class TransitionMap(object):
...
@@ -50,7 +50,7 @@ class TransitionMap(object):
#self.check() ###
#self.check() ###
def
add
(
self
,
event
,
new_state
,
def
add
(
self
,
event
,
new_state
,
TupleType
=
tuple
):
TupleType
=
tuple
):
"""
"""
Add transition to |new_state| on |event|.
Add transition to |new_state| on |event|.
"""
"""
...
@@ -61,12 +61,12 @@ class TransitionMap(object):
...
@@ -61,12 +61,12 @@ class TransitionMap(object):
map
=
self
.
map
map
=
self
.
map
while
i
<
j
:
while
i
<
j
:
map
[
i
+
1
][
new_state
]
=
1
map
[
i
+
1
][
new_state
]
=
1
i
=
i
+
2
i
+=
2
else
:
else
:
self
.
get_special
(
event
)[
new_state
]
=
1
self
.
get_special
(
event
)[
new_state
]
=
1
def
add_set
(
self
,
event
,
new_set
,
def
add_set
(
self
,
event
,
new_set
,
TupleType
=
tuple
):
TupleType
=
tuple
):
"""
"""
Add transitions to the states in |new_set| on |event|.
Add transitions to the states in |new_set| on |event|.
"""
"""
...
@@ -77,19 +77,19 @@ class TransitionMap(object):
...
@@ -77,19 +77,19 @@ class TransitionMap(object):
map
=
self
.
map
map
=
self
.
map
while
i
<
j
:
while
i
<
j
:
map
[
i
+
1
].
update
(
new_set
)
map
[
i
+
1
].
update
(
new_set
)
i
=
i
+
2
i
+=
2
else
:
else
:
self
.
get_special
(
event
).
update
(
new_set
)
self
.
get_special
(
event
).
update
(
new_set
)
def
get_epsilon
(
self
,
def
get_epsilon
(
self
,
none
=
None
):
none
=
None
):
"""
"""
Return the mapping for epsilon, or None.
Return the mapping for epsilon, or None.
"""
"""
return
self
.
special
.
get
(
''
,
none
)
return
self
.
special
.
get
(
''
,
none
)
def
iteritems
(
self
,
def
iteritems
(
self
,
len
=
len
):
len
=
len
):
"""
"""
Return the mapping as an iterable of ((code1, code2), state_set) and
Return the mapping as an iterable of ((code1, code2), state_set) and
(special_event, state_set) pairs.
(special_event, state_set) pairs.
...
@@ -106,17 +106,18 @@ class TransitionMap(object):
...
@@ -106,17 +106,18 @@ class TransitionMap(object):
if
set
or
else_set
:
if
set
or
else_set
:
result
.
append
(((
code0
,
code1
),
set
))
result
.
append
(((
code0
,
code1
),
set
))
code0
=
code1
code0
=
code1
i
=
i
+
2
i
+=
2
for
event
,
set
in
self
.
special
.
iteritems
():
for
event
,
set
in
self
.
special
.
iteritems
():
if
set
:
if
set
:
result
.
append
((
event
,
set
))
result
.
append
((
event
,
set
))
return
iter
(
result
)
return
iter
(
result
)
items
=
iteritems
items
=
iteritems
# ------------------- Private methods --------------------
# ------------------- Private methods --------------------
def
split
(
self
,
code
,
def
split
(
self
,
code
,
len
=
len
,
maxint
=
maxint
):
len
=
len
,
maxint
=
maxint
):
"""
"""
Search the list for the position of the split point for |code|,
Search the list for the position of the split point for |code|,
inserting a new split point if necessary. Returns index |i| such
inserting a new split point if necessary. Returns index |i| such
...
@@ -173,10 +174,10 @@ class TransitionMap(object):
...
@@ -173,10 +174,10 @@ class TransitionMap(object):
else
:
else
:
code_str
=
str
(
code
)
code_str
=
str
(
code
)
map_strs
.
append
(
code_str
)
map_strs
.
append
(
code_str
)
i
=
i
+
1
i
+=
1
if
i
<
n
:
if
i
<
n
:
map_strs
.
append
(
state_set_str
(
map
[
i
]))
map_strs
.
append
(
state_set_str
(
map
[
i
]))
i
=
i
+
1
i
+=
1
special_strs
=
{}
special_strs
=
{}
for
event
,
set
in
self
.
special
.
iteritems
():
for
event
,
set
in
self
.
special
.
iteritems
():
special_strs
[
event
]
=
state_set_str
(
set
)
special_strs
[
event
]
=
state_set_str
(
set
)
...
@@ -199,7 +200,7 @@ class TransitionMap(object):
...
@@ -199,7 +200,7 @@ class TransitionMap(object):
n
=
len
(
map
)
-
1
n
=
len
(
map
)
-
1
while
i
<
n
:
while
i
<
n
:
self
.
dump_range
(
map
[
i
],
map
[
i
+
2
],
map
[
i
+
1
],
file
)
self
.
dump_range
(
map
[
i
],
map
[
i
+
2
],
map
[
i
+
1
],
file
)
i
=
i
+
2
i
+=
2
for
event
,
set
in
self
.
special
.
iteritems
():
for
event
,
set
in
self
.
special
.
iteritems
():
if
set
:
if
set
:
if
not
event
:
if
not
event
:
...
@@ -234,6 +235,7 @@ class TransitionMap(object):
...
@@ -234,6 +235,7 @@ class TransitionMap(object):
def
dump_set
(
self
,
set
):
def
dump_set
(
self
,
set
):
return
state_set_str
(
set
)
return
state_set_str
(
set
)
#
#
# State set manipulation functions
# State set manipulation functions
#
#
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment