Commit f8262dd4 authored by Barry Warsaw's avatar Barry Warsaw

Add -z/-gzip option to store the backup files compressed. Right now,

it just uses the gzip module's default compression level (i.e. 9).

No need to specify -z on recovery because the script will
automatically uncompress gzip'd files.  It does this by looking at the
file extension, which will be .fsz or .deltafsz for compressed files.
parent 02c22abc
...@@ -43,6 +43,10 @@ Flags for --backup: ...@@ -43,6 +43,10 @@ Flags for --backup:
significantly reduces the disk i/o at the (theoretical) cost of significantly reduces the disk i/o at the (theoretical) cost of
inconsistency. inconsistency.
-z / --gzip
Compress with gzip the backup files. Uses the default zlib
compression level.
Flags for --recover: Flags for --recover:
-D str -D str
--date=str --date=str
...@@ -62,6 +66,7 @@ from __future__ import nested_scopes ...@@ -62,6 +66,7 @@ from __future__ import nested_scopes
import os import os
import sys import sys
import md5 import md5
import gzip
import time import time
import errno import errno
import getopt import getopt
...@@ -107,10 +112,10 @@ def log(msg, *args): ...@@ -107,10 +112,10 @@ def log(msg, *args):
def parseargs(): def parseargs():
global VERBOSE global VERBOSE
try: try:
opts, args = getopt.getopt(sys.argv[1:], 'BRvhf:r:FD:o:Q', opts, args = getopt.getopt(sys.argv[1:], 'BRvhf:r:FD:o:Qz',
['backup', 'recover', 'verbose', 'help', ['backup', 'recover', 'verbose', 'help',
'file=', 'repository=', 'full', 'date=', 'file=', 'repository=', 'full', 'date=',
'output=', 'quick']) 'output=', 'quick', 'gzip'])
except getopt.error, msg: except getopt.error, msg:
usage(1, msg) usage(1, msg)
...@@ -122,6 +127,7 @@ def parseargs(): ...@@ -122,6 +127,7 @@ def parseargs():
date = None date = None
output = None output = None
quick = False quick = False
gzip = False
options = Options() options = Options()
...@@ -150,6 +156,8 @@ def parseargs(): ...@@ -150,6 +156,8 @@ def parseargs():
options.date = arg options.date = arg
elif opt in ('-o', '--output'): elif opt in ('-o', '--output'):
options.output = arg options.output = arg
elif opt in ('-z', '--gzip'):
options.gzip = True
# Any other arguments are invalid # Any other arguments are invalid
if args: if args:
...@@ -177,12 +185,12 @@ def parseargs(): ...@@ -177,12 +185,12 @@ def parseargs():
# Do something with a run of bytes from a file # Do something with a run of bytes from a file
def dofile(func, fp, n): def dofile(func, fp, n=None):
bytesread = 0 bytesread = 0
stop = False stop = False
chunklen = READCHUNK chunklen = READCHUNK
while not stop: while not stop:
if chunklen + bytesread > n: if n is not None and chunklen + bytesread > n:
chunklen = n - bytesread chunklen = n - bytesread
stop = True stop = True
data = fp.read(chunklen) data = fp.read(chunklen)
...@@ -202,12 +210,15 @@ def checksum(fp, n): ...@@ -202,12 +210,15 @@ def checksum(fp, n):
return sum.hexdigest() return sum.hexdigest()
def copyfile(src, dst, start, n): def copyfile(options, dst, start, n):
# Copy bytes from file src, to file dst, starting at offset start, for n # Copy bytes from file src, to file dst, starting at offset start, for n
# length of bytes # length of bytes
sum = md5.new() sum = md5.new()
ifp = open(src, 'rb') ifp = open(options.file, 'rb')
ifp.seek(start) ifp.seek(start)
if options.gzip:
ofp = gzip.open(dst, 'wb')
else:
ofp = open(dst, 'wb') ofp = open(dst, 'wb')
def func(data): def func(data):
sum.update(data) sum.update(data)
...@@ -229,8 +240,12 @@ def concat(files, ofp=None): ...@@ -229,8 +240,12 @@ def concat(files, ofp=None):
ofp.write(data) ofp.write(data)
bytesread = 0 bytesread = 0
for f in files: for f in files:
# Auto uncompress
if f.endswith('fsz'):
ifp = gzip.open(f, 'rb')
else:
ifp = open(f, 'rb') ifp = open(f, 'rb')
bytesread += dofile(func, ifp, os.path.getsize(f)) bytesread += dofile(func, ifp)
ifp.close() ifp.close()
if ofp: if ofp:
ofp.close() ofp.close()
...@@ -243,6 +258,8 @@ def gen_filename(options, ext=None): ...@@ -243,6 +258,8 @@ def gen_filename(options, ext=None):
ext = '.fs' ext = '.fs'
else: else:
ext = '.deltafs' ext = '.deltafs'
if options.gzip:
ext += 'z'
t = time.gmtime()[:6] + (ext,) t = time.gmtime()[:6] + (ext,)
return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t return '%04d-%02d-%02d-%02d-%02d-%02d%s' % t
...@@ -265,7 +282,7 @@ def find_files(options): ...@@ -265,7 +282,7 @@ def find_files(options):
root, ext = os.path.splitext(file) root, ext = os.path.splitext(file)
if root <= when: if root <= when:
needed.append(file) needed.append(file)
if ext == '.fs': if ext in ('.fs', '.fsz'):
break break
# Make the file names relative to the repository directory # Make the file names relative to the repository directory
needed = [os.path.join(options.repository, f) for f in needed] needed = [os.path.join(options.repository, f) for f in needed]
...@@ -285,7 +302,6 @@ def scandat(repofiles): ...@@ -285,7 +302,6 @@ def scandat(repofiles):
# Return the filename, startpos, endpos, and sum of the last incremental. # Return the filename, startpos, endpos, and sum of the last incremental.
# If all is a list, then append file name and md5sums to the list. # If all is a list, then append file name and md5sums to the list.
fullfile = repofiles[0] fullfile = repofiles[0]
assert fullfile.endswith('.fs')
datfile = os.path.splitext(fullfile)[0] + '.dat' datfile = os.path.splitext(fullfile)[0] + '.dat'
# If the .dat file is missing, we have to do a full backup # If the .dat file is missing, we have to do a full backup
fn = startpos = endpos = sum = None fn = startpos = endpos = sum = None
...@@ -324,7 +340,7 @@ def do_full_backup(options): ...@@ -324,7 +340,7 @@ def do_full_backup(options):
print >> sys.stderr, 'Cannot overwrite existing file:', dest print >> sys.stderr, 'Cannot overwrite existing file:', dest
sys.exit(2) sys.exit(2)
log('writing full backup: %s bytes to %s', pos, dest) log('writing full backup: %s bytes to %s', pos, dest)
sum = copyfile(options.file, dest, 0, pos) sum = copyfile(options, dest, 0, pos)
# Write the data file for this full backup # Write the data file for this full backup
datfile = os.path.splitext(dest)[0] + '.dat' datfile = os.path.splitext(dest)[0] + '.dat'
fp = open(datfile, 'w') fp = open(datfile, 'w')
...@@ -348,12 +364,11 @@ def do_incremental_backup(options, reposz, repofiles): ...@@ -348,12 +364,11 @@ def do_incremental_backup(options, reposz, repofiles):
print >> sys.stderr, 'Cannot overwrite existing file:', dest print >> sys.stderr, 'Cannot overwrite existing file:', dest
sys.exit(2) sys.exit(2)
log('writing incremental: %s bytes to %s', pos-reposz, dest) log('writing incremental: %s bytes to %s', pos-reposz, dest)
sum = copyfile(options.file, dest, reposz, pos) sum = copyfile(options, dest, reposz, pos)
# The first file in repofiles points to the last full backup. Use this to # The first file in repofiles points to the last full backup. Use this to
# get the .dat file and append the information for this incrementatl to # get the .dat file and append the information for this incrementatl to
# that file. # that file.
fullfile = repofiles[0] fullfile = repofiles[0]
assert fullfile.endswith('.fs')
datfile = os.path.splitext(fullfile)[0] + '.dat' datfile = os.path.splitext(fullfile)[0] + '.dat'
# This .dat file better exist. Let the exception percolate if not. # This .dat file better exist. Let the exception percolate if not.
fp = open(datfile, 'a') fp = open(datfile, 'a')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment