Commit 9f1f8025 authored by Stefan Behnel's avatar Stefan Behnel

make encoding detection code a bit faster if the first two source file lines...

make encoding detection code a bit faster if the first two source file lines are longer than 250 bytes
parent ad356fd1
...@@ -189,6 +189,7 @@ def decode_filename(filename): ...@@ -189,6 +189,7 @@ def decode_filename(filename):
_match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search _match_file_encoding = re.compile(u"coding[:=]\s*([-\w.]+)").search
def detect_file_encoding(source_filename): def detect_file_encoding(source_filename):
f = open_source_file(source_filename, encoding="UTF-8", error_handling='ignore') f = open_source_file(source_filename, encoding="UTF-8", error_handling='ignore')
try: try:
...@@ -196,18 +197,22 @@ def detect_file_encoding(source_filename): ...@@ -196,18 +197,22 @@ def detect_file_encoding(source_filename):
finally: finally:
f.close() f.close()
def detect_opened_file_encoding(f): def detect_opened_file_encoding(f):
# PEPs 263 and 3120 # PEPs 263 and 3120
# Most of the time the first two lines fall in the first 250 chars, # Most of the time the first two lines fall in the first 250 chars,
# and this bulk read/split is much faster. # and this bulk read/split is much faster.
lines = f.read(250).split("\n") lines = f.read(250).split(u"\n")
if len(lines) > 2: if len(lines) > 1:
m = _match_file_encoding(lines[0]) or _match_file_encoding(lines[1]) m = _match_file_encoding(lines[0])
if m:
return m.group(1)
elif len(lines) > 2:
m = _match_file_encoding(lines[1])
if m: if m:
return m.group(1) return m.group(1)
else: else:
return "UTF-8" return "UTF-8"
else:
# Fallback to one-char-at-a-time detection. # Fallback to one-char-at-a-time detection.
f.seek(0) f.seek(0)
chars = [] chars = []
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment