Commit ac8463ad authored by Julien Muchembled's avatar Julien Muchembled

More refactoring and bugfixes

- Deprecate slapos.networkcachehelper
- Common internal method to query network cache, to reduce code duplication
  and fix downloading though SSL.
- Do not upload data that is already in SHACACHE. This reverts commit
  7bb5e112 partially, since it's not possible
  to hash on the fly. 'tempfile' module is reimported for non-seekable streams.
- New way to instanciate NetworkcacheClient, again to reduce code duplication,
  but also to use an instance for both upload & download.
  Old way is still there for compatibility until it is unused.
parent a2ee9fa6
......@@ -16,7 +16,7 @@ import hashlib
import httplib
import json
import os
import socket
import tempfile
import traceback
import urllib2
import urlparse
......@@ -29,40 +29,6 @@ TIMEOUT = 60
UPLOAD_TIMEOUT = 60 * 60
def urljoin(a, b):
if not a.endswith('/'):
a += '/'
return a + b
class hashing_file(object):
def __init__(self, file):
self._f = file
self._h = hashlib.sha512()
def hexdigest(self):
assert not self._f.read(1)
return self._h.hexdigest()
def read(self, *args, **kw):
d = self._f.read(*args, **kw)
self._h.update(d)
return d
def __len__(self):
f = self._f
try:
fd = f.fileno()
except AttributeError:
pos = f.tell()
try:
f.seek(0, 2)
return f.tell()
finally:
f.seek(pos)
return os.fstat(fd).st_size
class NetworkcacheClient(object):
'''
......@@ -73,74 +39,33 @@ class NetworkcacheClient(object):
'''
signature_private_key = None
def parseUrl(self, url):
return_dict = {}
parsed_url = urlparse.urlparse(url)
return_dict['header_dict'] = {'Content-Type': 'application/json'}
user = parsed_url.username
passwd = parsed_url.password
if user is not None:
return_dict['header_dict']['Authorization'] = 'Basic %s' %\
('%s:%s' % (user, passwd)).encode('base64').strip()
return_dict['path'] = parsed_url.path
return_dict['host'] = parsed_url.hostname
return_dict['scheme'] = parsed_url.scheme
return_dict['port'] = parsed_url.port or \
socket.getservbyname(parsed_url.scheme)
return return_dict
def __init__(self, *args, **kw):
"""Initializes shacache object"""
if isinstance(args[0], basestring) if args else 'config' not in kw:
self.__old_init(*args, **kw) # BBB
else:
self.__new_init(*args, **kw)
def __init__(self, shacache, shadir, signature_private_key_file=None,
def __old_init(self, shacache, shadir, signature_private_key_file=None,
signature_certificate_list=None, shacache_key_file=None,
shacache_cert_file=None, shadir_key_file=None, shadir_cert_file=None):
"""Initializes shacache object.
Parameters:
shacache
URL to shacache.
Required.
shadir
URL to shadir.
Required.
signature_private_key_file
Path to private key file used for signing content.
Optional.
signature_certificate_list
List of strings of certificates to verify content.
Optional
shacache_key_file
Key file used to authenticate to shacache.
Optional.
shacache_cert_file
Certificate file used to authenticate to shacache.
Optional.
shadir_key_file
Key file used to authenticate to shadir.
Optional.
shadir_cert_file
Certificate file used to authenticate to shadir.
Optional.
"""
# ShaCache Properties
for k, v in self.parseUrl(shacache).iteritems():
setattr(self, 'shacache_%s' % k, v)
self.shacache_url = shacache
self.shadir_url = shadir
# ShaDir Properties
for k, v in self.parseUrl(shadir).iteritems():
setattr(self, 'shadir_%s' % k, v)
if signature_private_key_file:
with open(signature_private_key_file) as f:
self.__new_init({
'signature-private-key-file': signature_private_key_file,
'download-cache-url': shacache,
'upload-cache-url': shacache,
'shacache-cert-file': shacache_cert_file,
'shacache-key-file': shacache_key_file,
'download-dir-url': shadir,
'upload-dir-url': shadir,
'shadir-cert-file': shadir_cert_file,
'shadir-key-file': shadir_key_file,
}, signature_certificate_list)
def __new_init(self, config, signature_certificate_list=None):
self.config = config
path = config.get('signature-private-key-file')
if path:
with open(path) as f:
self.signature_private_key = crypto.load_privatekey(crypto.FILETYPE_PEM,
f.read())
if type(signature_certificate_list) is str:
......@@ -154,10 +79,64 @@ class NetworkcacheClient(object):
crypto.load_certificate(crypto.FILETYPE_PEM, certificate)
for certificate in signature_certificate_list or ()]
self.shacache_key_file = shacache_key_file
self.shacache_cert_file = shacache_cert_file
self.shadir_key_file = shadir_key_file
self.shadir_cert_file = shadir_cert_file
def _request(self, where, name=None, data=None, headers=None):
if data is None:
method = 'GET'
url = self.config['download-%s-url' % where]
timeout = TIMEOUT
else:
method = 'PUT' if name else 'POST'
url = self.config['upload-%s-url' % where]
timeout = UPLOAD_TIMEOUT
parsed_url = urlparse.urlsplit(url.rstrip('/') + ('/' + name if name else ''))
if not headers:
headers = {}
if parsed_url.username:
headers['Authorization'] = 'Basic %s' % ('%s:%s' % (
parsed_url.username, parsed_url.password)).encode('base64').strip()
headers["Connection"] = "close"
if parsed_url.scheme == 'https':
connection = httplib.HTTPSConnection(parsed_url.hostname, parsed_url.port,
cert_file=self.config.get('sha%s-cert-file' % where),
key_file=self.config.get('sha%s-key-file' % where),
timeout=timeout)
else:
connection = httplib.HTTPConnection(parsed_url.hostname, parsed_url.port,
timeout=timeout)
try:
connection.request(method, parsed_url.path, data, headers)
r = connection.getresponse()
if 200 <= r.status < 300:
return r
except:
connection.close()
raise
raise urllib2.HTTPError(url, r.status, r.reason, r.msg, r.fp)
@staticmethod
def archive(path):
# Don't create it to /tmp dir as it can be too small.
parent, name = os.path.split(path)
f = tempfile.TemporaryFile(dir=parent)
with tarfile.open(fileobj=f, mode="w:gz") as tar:
tar.add(path, arcname=name)
return f
@staticmethod
def extract(path, fileobj):
path = os.path.dirname(path)
f = None
try:
if not hasattr(fileobj, 'tell'):
# WKRD: gzip decompressor wants a seekable stream.
f = tempfile.TemporaryFile(dir=path)
shutil.copyfileobj(fileobj, f)
fileobj = f
f.seek(0)
with tarfile.open(fileobj=fileobj, mode="r:gz") as tar:
tar.extractall(path=path)
finally:
f is None or f.close()
def upload(self, file_descriptor, key=None, urlmd5=None, file_name=None,
valid_until=None, architecture=None, **kw):
......@@ -165,31 +144,46 @@ class NetworkcacheClient(object):
If key is None it must only upload to SHACACHE.
Otherwise, it must create a new entry on SHADIR.
'''
# do not trust, go to beginning of opened file
sha512sum = hashlib.sha512()
f = None
try:
try:
file_descriptor.seek(0)
file_descriptor = hashing_file(file_descriptor)
if self.shacache_scheme == 'https':
shacache_connection = httplib.HTTPSConnection(self.shacache_host,
self.shacache_port, key_file=self.shacache_key_file,
cert_file=self.shacache_cert_file, timeout=UPLOAD_TIMEOUT)
except StandardError:
f = tempfile.TemporaryFile()
while 1:
data = file_descriptor.read(65536)
if not data:
break
f.write(data)
sha512sum.update(data)
file_descriptor = f
else:
shacache_connection = httplib.HTTPConnection(self.shacache_host,
self.shacache_port, timeout=UPLOAD_TIMEOUT)
while 1:
data = file_descriptor.read(65536)
if not data:
break
sha512sum.update(data)
sha512sum = sha512sum.hexdigest()
try:
shacache_connection.request('POST', self.shacache_path, file_descriptor,
self.shacache_header_dict)
result = shacache_connection.getresponse()
sha512sum = result.read()
finally:
shacache_connection.close()
if result.status != 201 or sha512sum != file_descriptor.hexdigest():
self._request('cache', sha512sum).close()
except urllib2.HTTPError:
size = file_descriptor.tell()
file_descriptor.seek(0)
result = self._request('cache', data=file_descriptor, headers={
'Content-Length': str(size),
'Content-Type': 'application/octet-stream'})
data = result.read()
if result.status != 201 or data != sha512sum:
raise UploadError('Failed to upload the file to SHACACHE Server.'
'URL: %s. Response code: %s. Response data: %s'
% (self.shacache_host, result.status, sha512sum))
'Response code: %s. Response data: %s'
% (result.status, data))
finally:
f is None or f.close()
if key is not None:
kw['sha512'] = sha512sum # always update sha512sum
file_name = kw.pop('file', file_name)
if file_name is None or urlmd5 is None:
raise ValueError('file_name and urlmd5 are required'
' for non-generic upload')
......@@ -206,35 +200,18 @@ class NetworkcacheClient(object):
def index(self, key, **kw):
data = json.dumps(kw)
data = [data, self._getSignatureString(data)]
if self.shadir_scheme == 'https':
shadir_connection = httplib.HTTPSConnection(self.shadir_host,
self.shadir_port, key_file=self.shadir_key_file,
cert_file=self.shadir_cert_file, timeout=UPLOAD_TIMEOUT)
else:
shadir_connection = httplib.HTTPConnection(self.shadir_host,
self.shadir_port, timeout=UPLOAD_TIMEOUT)
try:
shadir_connection.request('PUT', '/'.join([self.shadir_path, key]),
json.dumps(data), self.shadir_header_dict)
result = shadir_connection.getresponse()
data = result.read()
finally:
shadir_connection.close()
result = self._request('dir', key, json.dumps(data), {
'Content-Type': 'application/json'})
if result.status != 201:
raise UploadError('Failed to upload data to SHADIR Server.'
'URL: %s. Response code: %s. Response data: %s'
% (self.shadir_host, result.status, data))
'Response code: %s. Response data: %s'
% (status, result.read()))
def download(self, sha512sum):
''' Download the file.
It uses http GET request method.
'''
sha_cache_url = urljoin(self.shacache_url, sha512sum)
request = urllib2.Request(url=sha_cache_url, data=None,
headers=self.shadir_header_dict)
return urllib2.urlopen(request, timeout=TIMEOUT)
return self._request('cache', sha512sum)
def select(self, key):
''' Download a file from shacache by selecting the entry in shadir
......@@ -260,10 +237,7 @@ class NetworkcacheClient(object):
def select_generic(self, key, filter=True):
''' Select trustable entries from shadir.
'''
url = urljoin(self.shadir_url, key)
request = urllib2.Request(url=url, data=None,
headers=self.shadir_header_dict)
data = urllib2.urlopen(request, timeout=TIMEOUT).read()
data = self._request('dir', key).read()
try:
data_list = json.loads(data)
except Exception:
......
......@@ -79,6 +79,7 @@ class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler):
def do_POST(self):
assert 'shacache' in self.path
assert self.headers.getheader('content-type') == 'application/octet-stream'
path = os.path.abspath(os.path.join(self.tree, *self.path.split('/')))
if not os.path.exists(path):
os.makedirs(path)
......@@ -86,6 +87,9 @@ class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler):
cksum = hashlib.sha512(data).hexdigest()
path = os.path.join(path, cksum)
# Although real server would accept the request,
# clients should avoid uploading same content twice.
assert not os.path.exists(path)
open(path, 'wb').write(data)
self.send_response(201)
self.send_header('Content-Length', str(len(cksum)))
......@@ -152,32 +156,6 @@ class OfflineTest(unittest.TestCase):
self.shadir_url)
self.assertRaises(IOError, nc.upload, StringIO())
def test_init_method_normal_http_url(self):
"""
Check if the init method is setting the attributes correctly.
"""
nc = slapos.libnetworkcache.NetworkcacheClient(shacache=self.shacache_url,
shadir=self.shadir_url)
self.assertEquals({'Content-Type': 'application/json'}, \
nc.shacache_header_dict)
self.assertEquals(self.host, nc.shacache_host)
self.assertEquals(self.shacache_path, nc.shacache_path)
self.assertEquals(self.port, nc.shacache_port)
self.assertEquals(self.shacache_url, nc.shacache_url)
self.assertEquals({'Content-Type': 'application/json'}, \
nc.shadir_header_dict)
self.assertEquals(self.host, nc.shadir_host)
self.assertEquals(self.shadir_path, nc.shadir_path)
self.assertEquals(self.port, nc.shadir_port)
def test_init_backward_compatible(self):
"""Checks that invocation with minimal parameter works fine"""
nc = slapos.libnetworkcache.NetworkcacheClient(shacache=self.shacache_url,
shadir=self.shadir_url)
self.assertEqual(nc.shacache_url, self.shacache_url)
self.assertTrue(nc.shadir_host in self.shadir_url)
class OnlineMixin:
handler = NCHandler
......@@ -562,38 +540,6 @@ class OnlineTest(OnlineMixin, unittest.TestCase):
selected = signed_nc.select(key).read()
self.assertEqual(selected, self.test_string)
def test_shacache_key_cert_accepted(self):
key_file = tempfile.NamedTemporaryFile()
key_file.write(self.key)
key_file.flush()
certificate_file = tempfile.NamedTemporaryFile()
certificate_file.write(self.certificate)
certificate_file.flush()
nc = slapos.libnetworkcache.NetworkcacheClient(self.shacache, self.shadir,
shacache_cert_file=certificate_file, shacache_key_file=key_file)
# simplified assertion, as no http authentication server is available
self.assertEqual(nc.shacache_cert_file, certificate_file)
self.assertEqual(nc.shacache_key_file, key_file)
def test_shadir_key_cert_accepted(self):
key_file = tempfile.NamedTemporaryFile()
key_file.write(self.auth_key)
key_file.flush()
certificate_file = tempfile.NamedTemporaryFile()
certificate_file.write(self.auth_certificate)
certificate_file.flush()
# simplified assertion, as no http authentication server is available
nc = slapos.libnetworkcache.NetworkcacheClient(self.shadir, self.shadir,
shadir_cert_file=certificate_file, shadir_key_file=key_file)
# simplified assertion, as no http authentication server is available
self.assertEqual(nc.shadir_cert_file, certificate_file)
self.assertEqual(nc.shadir_key_file, key_file)
@unittest.skipUnless(os.environ.get('TEST_SHA_CACHE', '') != '',
"Requires standalone test server")
......
......@@ -12,11 +12,13 @@
#
##############################################################################
# BBB: Deprecated. This file is ugly and must disappear.
# DO NOT EXTEND IT. Add methods to NetworkcacheClient class instead.
import json
import logging
import os
import shutil
import tarfile
import urllib2
from slapos.libnetworkcache import NetworkcacheClient, UploadError, \
DirectoryNotFound
......@@ -25,19 +27,7 @@ logging.basicConfig()
logger = logging.getLogger('networkcachehelper')
logger.setLevel(logging.INFO)
def _split_last_directory(path):
"""
If basename(path) is a file (i.e /path/to/directory), do a simple split.
If basename(path) is a directory (i.e /path/to/directory/), split again to
have pair like ('/path/to', 'directory').
"""
path_dirname, path_basename = os.path.split(path)
if not path_basename:
# We were given a path like "/path/to/directory/": Split again.
path_dirname, path_basename = os.path.split(path_dirname)
return path_dirname, path_basename
def helper_upload_network_cached(dir_url, cache_url,
def __upload_network_cached(dir_url, cache_url,
file_descriptor, directory_key,
signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file, metadata_dict={}):
......@@ -83,28 +73,9 @@ def helper_upload_network_cached(dir_url, cache_url,
except (IOError, UploadError), e:
logger.info('Failed to upload file. %s' % str(e))
return False
return True
def helper_upload_network_cached_from_file(dir_url, cache_url,
path, directory_key, metadata_dict,
signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file):
"""
Upload an existing file, using a file_descriptor.
"""
file_descriptor = open(path, 'r')
return helper_upload_network_cached(
dir_url=dir_url,
cache_url=cache_url,
file_descriptor=file_descriptor,
directory_key=directory_key,
signature_private_key_file=signature_private_key_file,
shacache_cert_file=shacache_cert_file,
shacache_key_file=shacache_key_file,
shadir_cert_file=shadir_cert_file,
shadir_key_file=shadir_key_file,
metadata_dict=metadata_dict,
)
# BBB: slapos.buildout (1.6.0-dev-SlapOS-011) imports it without using it
helper_upload_network_cached_from_file = None
def helper_upload_network_cached_from_directory(dir_url, cache_url,
path, directory_key, metadata_dict,
......@@ -113,25 +84,10 @@ def helper_upload_network_cached_from_directory(dir_url, cache_url,
"""
Create a tar from a given directory (path) then upload it to networkcache.
"""
# Create tar file. Don't create it to /tmp dir as it can be too small.
path_dirname, path_basename = _split_last_directory(path)
tarpath = os.path.join(path_dirname, '%s.tar' % path_basename)
tar = tarfile.open(tarpath, "w:gz")
try:
try:
tar.add(path, arcname=path_basename)
finally:
tar.close()
# Upload it
result = helper_upload_network_cached_from_file(dir_url, cache_url,
tarpath, directory_key, metadata_dict,
return __upload_network_cached(dir_url, cache_url,
NetworkcacheClient.archive(path.rstrip(os.sep)), directory_key,
signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file)
finally:
# Always clean it
if os.path.exists(tarpath):
os.remove(tarpath)
return result
shadir_cert_file, shadir_key_file, metadata_dict)
def helper_download_network_cached(dir_url, cache_url,
......@@ -257,26 +213,13 @@ def helper_download_network_cached_to_directory(dir_url, cache_url,
"""
Download a tar file from network cache and untar it to specified path.
"""
# Download tar file. Don't download to /tmp dir as it can be too small.
path_dirname, path_basename = _split_last_directory(path)
tarpath = os.path.join(path_dirname, '%s.tar' % path_basename)
try:
metadata_dict = helper_download_network_cached_to_file(
dir_url, cache_url,
result = helper_download_network_cached(dir_url, cache_url,
signature_certificate_list,
directory_key, tarpath, wanted_metadata_dict, required_key_list,
strategy)
if metadata_dict:
# Untar it to path
tar = tarfile.open(tarpath)
directory_key, wanted_metadata_dict, required_key_list, strategy)
if result:
file_descriptor, metadata_dict = result
try:
logger.info("Extracting downloaded archive from cache...")
tar.extractall(path=os.path.dirname(path))
finally:
tar.close()
finally:
# Always clean it
if os.path.exists(tarpath):
os.remove(tarpath)
NetworkcacheClient.extract(path.rstrip('/'), file_descriptor)
return metadata_dict
finally:
file_descriptor.close()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment