Commit f09b2346 authored by Jeremy Hylton's avatar Jeremy Hylton

Add presumed speed optimization for pack().

Only open the file for unbuffered I/O after finishing the first phase
of pack.  The first phase gets its end-of-file position from the main
thread, so there's no possibility of reading a 'c' record.

Timings on Linux are inconclusive, but it seems like using buffered
I/O for the initial phase should be faster.
parent 6278b200
...@@ -416,14 +416,8 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -416,14 +416,8 @@ class FileStoragePacker(FileStorageFormatter):
# progress after it). # progress after it).
def __init__(self, path, stop, la, lr, cla, clr, current_size): def __init__(self, path, stop, la, lr, cla, clr, current_size):
self._name = path self._name = path
# Caution: It's critical that the file be opened in unbuffered mode. self._file = open(path, "rb")
# The code used to leave off the trailing 0 argument, and then on self._path = path
# every platform except native Windows it was observed that we could
# read stale data from the tail end of the file -- keep in mind that
# transactions can still be in progress throughout much of packing,
# and are written to the same physical file but via a distinct Python
# file object.
self._file = open(path, "rb", 0)
self._stop = stop self._stop = stop
self.locked = 0 self.locked = 0
self.file_end = current_size self.file_end = current_size
...@@ -493,6 +487,19 @@ class FileStoragePacker(FileStorageFormatter): ...@@ -493,6 +487,19 @@ class FileStoragePacker(FileStorageFormatter):
self.locked = 1 self.locked = 1
self._lock_acquire() self._lock_acquire()
try: try:
# Re-open the file in unbuffered mode.
# The main thread may write new transactions to the file,
# which creates the possibility that we will read a status
# 'c' transaction into the pack thread's stdio buffer even
# though we're acquiring the commit lock. Transactions
# can still be in progress throughout much of packing, and
# are written to the same physical file but via a distinct
# Python file object. The code used to leave off the
# trailing 0 argument, and then on every platform except
# native Windows it was observed that we could read stale
# data from the tail end of the file.
self._file = open(self._path, "rb", 0)
self._file.seek(0, 2) self._file.seek(0, 2)
self.file_end = self._file.tell() self.file_end = self._file.tell()
finally: finally:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment