Commit 9a8fe7c4 authored by Guido van Rossum's avatar Guido van Rossum

The cdata handling code for <script> and <style> could be confused by

buffer boundaries in the middle of the cdata stretch.

Fixed this, and added a clear_cdata_mode() callback after a successful
</endtag>.  (I hope this doesn't break other things.  This parser is
getting horribly ad-hoc. :-( )
parent 12d669c1
...@@ -14,7 +14,7 @@ import string ...@@ -14,7 +14,7 @@ import string
# Regular expressions used for parsing # Regular expressions used for parsing
interesting_normal = re.compile('[&<]') interesting_normal = re.compile('[&<]')
interesting_cdata = re.compile('</') interesting_cdata = re.compile(r'<(/|\Z)')
incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*|#[0-9]*)?') incomplete = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*|#[0-9]*)?')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
...@@ -144,6 +144,9 @@ class HTMLParser: ...@@ -144,6 +144,9 @@ class HTMLParser:
def set_cdata_mode(self): def set_cdata_mode(self):
self.interesting = interesting_cdata self.interesting = interesting_cdata
def clear_cdata_mode(self):
self.interesting = interesting_normal
# Internal -- handle data as far as reasonable. May leave state # Internal -- handle data as far as reasonable. May leave state
# and data to be processed by a subsequent call. If 'end' is # and data to be processed by a subsequent call. If 'end' is
# true, force handling all data as if followed by EOF marker. # true, force handling all data as if followed by EOF marker.
...@@ -155,7 +158,6 @@ class HTMLParser: ...@@ -155,7 +158,6 @@ class HTMLParser:
match = self.interesting.search(rawdata, i) # < or & match = self.interesting.search(rawdata, i) # < or &
if match: if match:
j = match.start() j = match.start()
self.interesting = interesting_normal
else: else:
j = n j = n
if i < j: self.handle_data(rawdata[i:j]) if i < j: self.handle_data(rawdata[i:j])
...@@ -166,6 +168,8 @@ class HTMLParser: ...@@ -166,6 +168,8 @@ class HTMLParser:
k = self.parse_starttag(i) k = self.parse_starttag(i)
elif endtagopen.match(rawdata, i): # </ elif endtagopen.match(rawdata, i): # </
k = self.parse_endtag(i) k = self.parse_endtag(i)
if k >= 0:
self.clear_cdata_mode()
elif commentopen.match(rawdata, i): # <!-- elif commentopen.match(rawdata, i): # <!--
k = self.parse_comment(i) k = self.parse_comment(i)
elif piopen.match(rawdata, i): # <? elif piopen.match(rawdata, i): # <?
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment