Commit f6fdecac authored by Stefan Behnel's avatar Stefan Behnel

some more cythonisation in Plex scanner classes (15% faster for lxml)

parent 97ed7663
...@@ -6,28 +6,29 @@ cdef class Scanner: ...@@ -6,28 +6,29 @@ cdef class Scanner:
cdef public stream cdef public stream
cdef public name cdef public name
cdef public buffer cdef public buffer
cdef public long buf_start_pos cdef public Py_ssize_t buf_start_pos
cdef public long next_pos cdef public Py_ssize_t next_pos
cdef public long cur_pos cdef public Py_ssize_t cur_pos
cdef public long cur_line cdef public Py_ssize_t cur_line
cdef public long cur_line_start cdef public Py_ssize_t cur_line_start
cdef public long start_pos cdef public Py_ssize_t start_pos
cdef public long start_line cdef public Py_ssize_t start_line
cdef public long start_col cdef public Py_ssize_t start_col
cdef public text cdef public text
cdef public initial_state # int? cdef public initial_state # int?
cdef public state_name cdef public state_name
cdef public list queue cdef public list queue
cdef public bint trace cdef public bint trace
cdef public cur_char cdef public cur_char
cdef public input_state cdef public int input_state
cdef public level cdef public level
@cython.locals(input_state=long) @cython.locals(input_state=long)
cpdef next_char(self) cpdef next_char(self)
cpdef read(self) cpdef read(self)
cpdef position(self) cpdef tuple scan_a_token(self)
cpdef tuple position(self)
@cython.locals(cur_pos=cython.long, cur_line=cython.long, @cython.locals(cur_pos=cython.long, cur_line=cython.long,
cur_line_start=cython.long, input_state=cython.long, cur_line_start=cython.long, input_state=cython.long,
......
...@@ -75,6 +75,8 @@ class Scanner: ...@@ -75,6 +75,8 @@ class Scanner:
|name| is optional, and may be the name of the file being |name| is optional, and may be the name of the file being
scanned or any other identifying string. scanned or any other identifying string.
""" """
self.trace = 0
self.buffer = '' self.buffer = ''
self.buf_start_pos = 0 self.buf_start_pos = 0
self.next_pos = 0 self.next_pos = 0
...@@ -135,7 +137,7 @@ class Scanner: ...@@ -135,7 +137,7 @@ class Scanner:
# else: # else:
# action = self.run_machine_inlined() # action = self.run_machine_inlined()
action = self.run_machine_inlined() action = self.run_machine_inlined()
if action: if action is not None:
if self.trace: if self.trace:
print("Scanner: read: Performing %s %d:%d" % ( print("Scanner: read: Performing %s %d:%d" % (
action, self.start_pos, self.cur_pos)) action, self.start_pos, self.cur_pos))
...@@ -144,21 +146,11 @@ class Scanner: ...@@ -144,21 +146,11 @@ class Scanner:
return (text, action) return (text, action)
else: else:
if self.cur_pos == self.start_pos: if self.cur_pos == self.start_pos:
if self.cur_char == EOL: if self.cur_char is EOL:
self.next_char() self.next_char()
if not self.cur_char or self.cur_char == EOF: if self.cur_char is None or self.cur_char is EOF:
return ('', None) return ('', None)
raise Errors.UnrecognizedInput(self, self.state_name) raise Errors.UnrecognizedInput(self, self.state_name)
def run_machine(self):
"""
Run the machine until no more transitions are possible.
"""
self.state = self.initial_state
self.backup_state = None
while self.transition():
pass
return self.back_up()
def run_machine_inlined(self): def run_machine_inlined(self):
""" """
...@@ -183,7 +175,7 @@ class Scanner: ...@@ -183,7 +175,7 @@ class Scanner:
# Begin inlined self.save_for_backup() # Begin inlined self.save_for_backup()
#action = state.action #@slow #action = state.action #@slow
action = state['action'] #@fast action = state['action'] #@fast
if action: if action is not None:
backup_state = ( backup_state = (
action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos) action, cur_pos, cur_line, cur_line_start, cur_char, input_state, next_pos)
# End inlined self.save_for_backup() # End inlined self.save_for_backup()
...@@ -245,7 +237,7 @@ class Scanner: ...@@ -245,7 +237,7 @@ class Scanner:
if trace: #TRACE# if trace: #TRACE#
print("blocked") #TRACE# print("blocked") #TRACE#
# Begin inlined: action = self.back_up() # Begin inlined: action = self.back_up()
if backup_state: if backup_state is not None:
(action, cur_pos, cur_line, cur_line_start, (action, cur_pos, cur_line, cur_line_start,
cur_char, input_state, next_pos) = backup_state cur_char, input_state, next_pos) = backup_state
else: else:
...@@ -259,46 +251,9 @@ class Scanner: ...@@ -259,46 +251,9 @@ class Scanner:
self.input_state = input_state self.input_state = input_state
self.next_pos = next_pos self.next_pos = next_pos
if trace: #TRACE# if trace: #TRACE#
if action: #TRACE# if action is not None: #TRACE#
print("Doing " + action) #TRACE# print("Doing %s" % action) #TRACE#
return action return action
# def transition(self):
# self.save_for_backup()
# c = self.cur_char
# new_state = self.state.new_state(c)
# if new_state:
# if self.trace:
# print "Scanner: read: State %d: %s --> State %d" % (
# self.state.number, repr(c), new_state.number)
# self.state = new_state
# self.next_char()
# return 1
# else:
# if self.trace:
# print "Scanner: read: State %d: %s --> blocked" % (
# self.state.number, repr(c))
# return 0
# def save_for_backup(self):
# action = self.state.get_action()
# if action:
# if self.trace:
# print "Scanner: read: Saving backup point at", self.cur_pos
# self.backup_state = (
# action, self.cur_pos, self.cur_line, self.cur_line_start,
# self.cur_char, self.input_state, self.next_pos)
# def back_up(self):
# backup_state = self.backup_state
# if backup_state:
# (action, self.cur_pos, self.cur_line, self.cur_line_start,
# self.cur_char, self.input_state, self.next_pos) = backup_state
# if self.trace:
# print "Scanner: read: Backing up to", self.cur_pos
# return action
# else:
# return None
def next_char(self): def next_char(self):
input_state = self.input_state input_state = self.input_state
...@@ -330,26 +285,7 @@ class Scanner: ...@@ -330,26 +285,7 @@ class Scanner:
self.cur_char = '' self.cur_char = ''
if self.trace: if self.trace:
print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char))) print("--> [%d] %d %s" % (input_state, self.cur_pos, repr(self.cur_char)))
# def read_char(self):
# """
# Get the next input character, filling the buffer if necessary.
# Returns '' at end of file.
# """
# next_pos = self.next_pos
# buf_index = next_pos - self.buf_start_pos
# if buf_index == len(self.buffer):
# discard = self.start_pos - self.buf_start_pos
# data = self.stream.read(0x1000)
# self.buffer = self.buffer[discard:] + data
# self.buf_start_pos = self.buf_start_pos + discard
# buf_index = buf_index - discard
# if not data:
# return ''
# c = self.buffer[buf_index]
# self.next_pos = next_pos + 1
# return c
def position(self): def position(self):
""" """
Return a tuple (name, line, col) representing the location of Return a tuple (name, line, col) representing the location of
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment