Commit ac8463ad authored by Julien Muchembled's avatar Julien Muchembled

More refactoring and bugfixes

- Deprecate slapos.networkcachehelper
- Common internal method to query network cache, to reduce code duplication
  and fix downloading though SSL.
- Do not upload data that is already in SHACACHE. This reverts commit
  7bb5e112 partially, since it's not possible
  to hash on the fly. 'tempfile' module is reimported for non-seekable streams.
- New way to instanciate NetworkcacheClient, again to reduce code duplication,
  but also to use an instance for both upload & download.
  Old way is still there for compatibility until it is unused.
parent a2ee9fa6
...@@ -16,7 +16,7 @@ import hashlib ...@@ -16,7 +16,7 @@ import hashlib
import httplib import httplib
import json import json
import os import os
import socket import tempfile
import traceback import traceback
import urllib2 import urllib2
import urlparse import urlparse
...@@ -29,40 +29,6 @@ TIMEOUT = 60 ...@@ -29,40 +29,6 @@ TIMEOUT = 60
UPLOAD_TIMEOUT = 60 * 60 UPLOAD_TIMEOUT = 60 * 60
def urljoin(a, b):
if not a.endswith('/'):
a += '/'
return a + b
class hashing_file(object):
def __init__(self, file):
self._f = file
self._h = hashlib.sha512()
def hexdigest(self):
assert not self._f.read(1)
return self._h.hexdigest()
def read(self, *args, **kw):
d = self._f.read(*args, **kw)
self._h.update(d)
return d
def __len__(self):
f = self._f
try:
fd = f.fileno()
except AttributeError:
pos = f.tell()
try:
f.seek(0, 2)
return f.tell()
finally:
f.seek(pos)
return os.fstat(fd).st_size
class NetworkcacheClient(object): class NetworkcacheClient(object):
''' '''
...@@ -73,74 +39,33 @@ class NetworkcacheClient(object): ...@@ -73,74 +39,33 @@ class NetworkcacheClient(object):
''' '''
signature_private_key = None signature_private_key = None
def parseUrl(self, url): def __init__(self, *args, **kw):
return_dict = {} """Initializes shacache object"""
parsed_url = urlparse.urlparse(url) if isinstance(args[0], basestring) if args else 'config' not in kw:
return_dict['header_dict'] = {'Content-Type': 'application/json'} self.__old_init(*args, **kw) # BBB
user = parsed_url.username else:
passwd = parsed_url.password self.__new_init(*args, **kw)
if user is not None:
return_dict['header_dict']['Authorization'] = 'Basic %s' %\
('%s:%s' % (user, passwd)).encode('base64').strip()
return_dict['path'] = parsed_url.path
return_dict['host'] = parsed_url.hostname
return_dict['scheme'] = parsed_url.scheme
return_dict['port'] = parsed_url.port or \
socket.getservbyname(parsed_url.scheme)
return return_dict
def __init__(self, shacache, shadir, signature_private_key_file=None, def __old_init(self, shacache, shadir, signature_private_key_file=None,
signature_certificate_list=None, shacache_key_file=None, signature_certificate_list=None, shacache_key_file=None,
shacache_cert_file=None, shadir_key_file=None, shadir_cert_file=None): shacache_cert_file=None, shadir_key_file=None, shadir_cert_file=None):
"""Initializes shacache object. self.__new_init({
'signature-private-key-file': signature_private_key_file,
Parameters: 'download-cache-url': shacache,
shacache 'upload-cache-url': shacache,
URL to shacache. 'shacache-cert-file': shacache_cert_file,
Required. 'shacache-key-file': shacache_key_file,
'download-dir-url': shadir,
shadir 'upload-dir-url': shadir,
URL to shadir. 'shadir-cert-file': shadir_cert_file,
Required. 'shadir-key-file': shadir_key_file,
}, signature_certificate_list)
signature_private_key_file
Path to private key file used for signing content. def __new_init(self, config, signature_certificate_list=None):
Optional. self.config = config
path = config.get('signature-private-key-file')
signature_certificate_list if path:
List of strings of certificates to verify content. with open(path) as f:
Optional
shacache_key_file
Key file used to authenticate to shacache.
Optional.
shacache_cert_file
Certificate file used to authenticate to shacache.
Optional.
shadir_key_file
Key file used to authenticate to shadir.
Optional.
shadir_cert_file
Certificate file used to authenticate to shadir.
Optional.
"""
# ShaCache Properties
for k, v in self.parseUrl(shacache).iteritems():
setattr(self, 'shacache_%s' % k, v)
self.shacache_url = shacache
self.shadir_url = shadir
# ShaDir Properties
for k, v in self.parseUrl(shadir).iteritems():
setattr(self, 'shadir_%s' % k, v)
if signature_private_key_file:
with open(signature_private_key_file) as f:
self.signature_private_key = crypto.load_privatekey(crypto.FILETYPE_PEM, self.signature_private_key = crypto.load_privatekey(crypto.FILETYPE_PEM,
f.read()) f.read())
if type(signature_certificate_list) is str: if type(signature_certificate_list) is str:
...@@ -154,10 +79,64 @@ class NetworkcacheClient(object): ...@@ -154,10 +79,64 @@ class NetworkcacheClient(object):
crypto.load_certificate(crypto.FILETYPE_PEM, certificate) crypto.load_certificate(crypto.FILETYPE_PEM, certificate)
for certificate in signature_certificate_list or ()] for certificate in signature_certificate_list or ()]
self.shacache_key_file = shacache_key_file def _request(self, where, name=None, data=None, headers=None):
self.shacache_cert_file = shacache_cert_file if data is None:
self.shadir_key_file = shadir_key_file method = 'GET'
self.shadir_cert_file = shadir_cert_file url = self.config['download-%s-url' % where]
timeout = TIMEOUT
else:
method = 'PUT' if name else 'POST'
url = self.config['upload-%s-url' % where]
timeout = UPLOAD_TIMEOUT
parsed_url = urlparse.urlsplit(url.rstrip('/') + ('/' + name if name else ''))
if not headers:
headers = {}
if parsed_url.username:
headers['Authorization'] = 'Basic %s' % ('%s:%s' % (
parsed_url.username, parsed_url.password)).encode('base64').strip()
headers["Connection"] = "close"
if parsed_url.scheme == 'https':
connection = httplib.HTTPSConnection(parsed_url.hostname, parsed_url.port,
cert_file=self.config.get('sha%s-cert-file' % where),
key_file=self.config.get('sha%s-key-file' % where),
timeout=timeout)
else:
connection = httplib.HTTPConnection(parsed_url.hostname, parsed_url.port,
timeout=timeout)
try:
connection.request(method, parsed_url.path, data, headers)
r = connection.getresponse()
if 200 <= r.status < 300:
return r
except:
connection.close()
raise
raise urllib2.HTTPError(url, r.status, r.reason, r.msg, r.fp)
@staticmethod
def archive(path):
# Don't create it to /tmp dir as it can be too small.
parent, name = os.path.split(path)
f = tempfile.TemporaryFile(dir=parent)
with tarfile.open(fileobj=f, mode="w:gz") as tar:
tar.add(path, arcname=name)
return f
@staticmethod
def extract(path, fileobj):
path = os.path.dirname(path)
f = None
try:
if not hasattr(fileobj, 'tell'):
# WKRD: gzip decompressor wants a seekable stream.
f = tempfile.TemporaryFile(dir=path)
shutil.copyfileobj(fileobj, f)
fileobj = f
f.seek(0)
with tarfile.open(fileobj=fileobj, mode="r:gz") as tar:
tar.extractall(path=path)
finally:
f is None or f.close()
def upload(self, file_descriptor, key=None, urlmd5=None, file_name=None, def upload(self, file_descriptor, key=None, urlmd5=None, file_name=None,
valid_until=None, architecture=None, **kw): valid_until=None, architecture=None, **kw):
...@@ -165,31 +144,46 @@ class NetworkcacheClient(object): ...@@ -165,31 +144,46 @@ class NetworkcacheClient(object):
If key is None it must only upload to SHACACHE. If key is None it must only upload to SHACACHE.
Otherwise, it must create a new entry on SHADIR. Otherwise, it must create a new entry on SHADIR.
''' '''
# do not trust, go to beginning of opened file sha512sum = hashlib.sha512()
file_descriptor.seek(0) f = None
file_descriptor = hashing_file(file_descriptor)
if self.shacache_scheme == 'https':
shacache_connection = httplib.HTTPSConnection(self.shacache_host,
self.shacache_port, key_file=self.shacache_key_file,
cert_file=self.shacache_cert_file, timeout=UPLOAD_TIMEOUT)
else:
shacache_connection = httplib.HTTPConnection(self.shacache_host,
self.shacache_port, timeout=UPLOAD_TIMEOUT)
try: try:
shacache_connection.request('POST', self.shacache_path, file_descriptor, try:
self.shacache_header_dict) file_descriptor.seek(0)
result = shacache_connection.getresponse() except StandardError:
sha512sum = result.read() f = tempfile.TemporaryFile()
while 1:
data = file_descriptor.read(65536)
if not data:
break
f.write(data)
sha512sum.update(data)
file_descriptor = f
else:
while 1:
data = file_descriptor.read(65536)
if not data:
break
sha512sum.update(data)
sha512sum = sha512sum.hexdigest()
try:
self._request('cache', sha512sum).close()
except urllib2.HTTPError:
size = file_descriptor.tell()
file_descriptor.seek(0)
result = self._request('cache', data=file_descriptor, headers={
'Content-Length': str(size),
'Content-Type': 'application/octet-stream'})
data = result.read()
if result.status != 201 or data != sha512sum:
raise UploadError('Failed to upload the file to SHACACHE Server.'
'Response code: %s. Response data: %s'
% (result.status, data))
finally: finally:
shacache_connection.close() f is None or f.close()
if result.status != 201 or sha512sum != file_descriptor.hexdigest():
raise UploadError('Failed to upload the file to SHACACHE Server.'
'URL: %s. Response code: %s. Response data: %s'
% (self.shacache_host, result.status, sha512sum))
if key is not None: if key is not None:
kw['sha512'] = sha512sum # always update sha512sum kw['sha512'] = sha512sum # always update sha512sum
file_name = kw.pop('file', file_name)
if file_name is None or urlmd5 is None: if file_name is None or urlmd5 is None:
raise ValueError('file_name and urlmd5 are required' raise ValueError('file_name and urlmd5 are required'
' for non-generic upload') ' for non-generic upload')
...@@ -206,35 +200,18 @@ class NetworkcacheClient(object): ...@@ -206,35 +200,18 @@ class NetworkcacheClient(object):
def index(self, key, **kw): def index(self, key, **kw):
data = json.dumps(kw) data = json.dumps(kw)
data = [data, self._getSignatureString(data)] data = [data, self._getSignatureString(data)]
result = self._request('dir', key, json.dumps(data), {
if self.shadir_scheme == 'https': 'Content-Type': 'application/json'})
shadir_connection = httplib.HTTPSConnection(self.shadir_host,
self.shadir_port, key_file=self.shadir_key_file,
cert_file=self.shadir_cert_file, timeout=UPLOAD_TIMEOUT)
else:
shadir_connection = httplib.HTTPConnection(self.shadir_host,
self.shadir_port, timeout=UPLOAD_TIMEOUT)
try:
shadir_connection.request('PUT', '/'.join([self.shadir_path, key]),
json.dumps(data), self.shadir_header_dict)
result = shadir_connection.getresponse()
data = result.read()
finally:
shadir_connection.close()
if result.status != 201: if result.status != 201:
raise UploadError('Failed to upload data to SHADIR Server.' raise UploadError('Failed to upload data to SHADIR Server.'
'URL: %s. Response code: %s. Response data: %s' 'Response code: %s. Response data: %s'
% (self.shadir_host, result.status, data)) % (status, result.read()))
def download(self, sha512sum): def download(self, sha512sum):
''' Download the file. ''' Download the file.
It uses http GET request method. It uses http GET request method.
''' '''
sha_cache_url = urljoin(self.shacache_url, sha512sum) return self._request('cache', sha512sum)
request = urllib2.Request(url=sha_cache_url, data=None,
headers=self.shadir_header_dict)
return urllib2.urlopen(request, timeout=TIMEOUT)
def select(self, key): def select(self, key):
''' Download a file from shacache by selecting the entry in shadir ''' Download a file from shacache by selecting the entry in shadir
...@@ -260,10 +237,7 @@ class NetworkcacheClient(object): ...@@ -260,10 +237,7 @@ class NetworkcacheClient(object):
def select_generic(self, key, filter=True): def select_generic(self, key, filter=True):
''' Select trustable entries from shadir. ''' Select trustable entries from shadir.
''' '''
url = urljoin(self.shadir_url, key) data = self._request('dir', key).read()
request = urllib2.Request(url=url, data=None,
headers=self.shadir_header_dict)
data = urllib2.urlopen(request, timeout=TIMEOUT).read()
try: try:
data_list = json.loads(data) data_list = json.loads(data)
except Exception: except Exception:
......
...@@ -79,6 +79,7 @@ class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler): ...@@ -79,6 +79,7 @@ class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler):
def do_POST(self): def do_POST(self):
assert 'shacache' in self.path assert 'shacache' in self.path
assert self.headers.getheader('content-type') == 'application/octet-stream'
path = os.path.abspath(os.path.join(self.tree, *self.path.split('/'))) path = os.path.abspath(os.path.join(self.tree, *self.path.split('/')))
if not os.path.exists(path): if not os.path.exists(path):
os.makedirs(path) os.makedirs(path)
...@@ -86,6 +87,9 @@ class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler): ...@@ -86,6 +87,9 @@ class NCHandler(BaseHTTPServer.BaseHTTPRequestHandler):
cksum = hashlib.sha512(data).hexdigest() cksum = hashlib.sha512(data).hexdigest()
path = os.path.join(path, cksum) path = os.path.join(path, cksum)
# Although real server would accept the request,
# clients should avoid uploading same content twice.
assert not os.path.exists(path)
open(path, 'wb').write(data) open(path, 'wb').write(data)
self.send_response(201) self.send_response(201)
self.send_header('Content-Length', str(len(cksum))) self.send_header('Content-Length', str(len(cksum)))
...@@ -152,32 +156,6 @@ class OfflineTest(unittest.TestCase): ...@@ -152,32 +156,6 @@ class OfflineTest(unittest.TestCase):
self.shadir_url) self.shadir_url)
self.assertRaises(IOError, nc.upload, StringIO()) self.assertRaises(IOError, nc.upload, StringIO())
def test_init_method_normal_http_url(self):
"""
Check if the init method is setting the attributes correctly.
"""
nc = slapos.libnetworkcache.NetworkcacheClient(shacache=self.shacache_url,
shadir=self.shadir_url)
self.assertEquals({'Content-Type': 'application/json'}, \
nc.shacache_header_dict)
self.assertEquals(self.host, nc.shacache_host)
self.assertEquals(self.shacache_path, nc.shacache_path)
self.assertEquals(self.port, nc.shacache_port)
self.assertEquals(self.shacache_url, nc.shacache_url)
self.assertEquals({'Content-Type': 'application/json'}, \
nc.shadir_header_dict)
self.assertEquals(self.host, nc.shadir_host)
self.assertEquals(self.shadir_path, nc.shadir_path)
self.assertEquals(self.port, nc.shadir_port)
def test_init_backward_compatible(self):
"""Checks that invocation with minimal parameter works fine"""
nc = slapos.libnetworkcache.NetworkcacheClient(shacache=self.shacache_url,
shadir=self.shadir_url)
self.assertEqual(nc.shacache_url, self.shacache_url)
self.assertTrue(nc.shadir_host in self.shadir_url)
class OnlineMixin: class OnlineMixin:
handler = NCHandler handler = NCHandler
...@@ -562,38 +540,6 @@ class OnlineTest(OnlineMixin, unittest.TestCase): ...@@ -562,38 +540,6 @@ class OnlineTest(OnlineMixin, unittest.TestCase):
selected = signed_nc.select(key).read() selected = signed_nc.select(key).read()
self.assertEqual(selected, self.test_string) self.assertEqual(selected, self.test_string)
def test_shacache_key_cert_accepted(self):
key_file = tempfile.NamedTemporaryFile()
key_file.write(self.key)
key_file.flush()
certificate_file = tempfile.NamedTemporaryFile()
certificate_file.write(self.certificate)
certificate_file.flush()
nc = slapos.libnetworkcache.NetworkcacheClient(self.shacache, self.shadir,
shacache_cert_file=certificate_file, shacache_key_file=key_file)
# simplified assertion, as no http authentication server is available
self.assertEqual(nc.shacache_cert_file, certificate_file)
self.assertEqual(nc.shacache_key_file, key_file)
def test_shadir_key_cert_accepted(self):
key_file = tempfile.NamedTemporaryFile()
key_file.write(self.auth_key)
key_file.flush()
certificate_file = tempfile.NamedTemporaryFile()
certificate_file.write(self.auth_certificate)
certificate_file.flush()
# simplified assertion, as no http authentication server is available
nc = slapos.libnetworkcache.NetworkcacheClient(self.shadir, self.shadir,
shadir_cert_file=certificate_file, shadir_key_file=key_file)
# simplified assertion, as no http authentication server is available
self.assertEqual(nc.shadir_cert_file, certificate_file)
self.assertEqual(nc.shadir_key_file, key_file)
@unittest.skipUnless(os.environ.get('TEST_SHA_CACHE', '') != '', @unittest.skipUnless(os.environ.get('TEST_SHA_CACHE', '') != '',
"Requires standalone test server") "Requires standalone test server")
......
...@@ -12,11 +12,13 @@ ...@@ -12,11 +12,13 @@
# #
############################################################################## ##############################################################################
# BBB: Deprecated. This file is ugly and must disappear.
# DO NOT EXTEND IT. Add methods to NetworkcacheClient class instead.
import json import json
import logging import logging
import os import os
import shutil import shutil
import tarfile
import urllib2 import urllib2
from slapos.libnetworkcache import NetworkcacheClient, UploadError, \ from slapos.libnetworkcache import NetworkcacheClient, UploadError, \
DirectoryNotFound DirectoryNotFound
...@@ -25,19 +27,7 @@ logging.basicConfig() ...@@ -25,19 +27,7 @@ logging.basicConfig()
logger = logging.getLogger('networkcachehelper') logger = logging.getLogger('networkcachehelper')
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
def _split_last_directory(path): def __upload_network_cached(dir_url, cache_url,
"""
If basename(path) is a file (i.e /path/to/directory), do a simple split.
If basename(path) is a directory (i.e /path/to/directory/), split again to
have pair like ('/path/to', 'directory').
"""
path_dirname, path_basename = os.path.split(path)
if not path_basename:
# We were given a path like "/path/to/directory/": Split again.
path_dirname, path_basename = os.path.split(path_dirname)
return path_dirname, path_basename
def helper_upload_network_cached(dir_url, cache_url,
file_descriptor, directory_key, file_descriptor, directory_key,
signature_private_key_file, shacache_cert_file, shacache_key_file, signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file, metadata_dict={}): shadir_cert_file, shadir_key_file, metadata_dict={}):
...@@ -83,28 +73,9 @@ def helper_upload_network_cached(dir_url, cache_url, ...@@ -83,28 +73,9 @@ def helper_upload_network_cached(dir_url, cache_url,
except (IOError, UploadError), e: except (IOError, UploadError), e:
logger.info('Failed to upload file. %s' % str(e)) logger.info('Failed to upload file. %s' % str(e))
return False return False
return True
def helper_upload_network_cached_from_file(dir_url, cache_url, # BBB: slapos.buildout (1.6.0-dev-SlapOS-011) imports it without using it
path, directory_key, metadata_dict, helper_upload_network_cached_from_file = None
signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file):
"""
Upload an existing file, using a file_descriptor.
"""
file_descriptor = open(path, 'r')
return helper_upload_network_cached(
dir_url=dir_url,
cache_url=cache_url,
file_descriptor=file_descriptor,
directory_key=directory_key,
signature_private_key_file=signature_private_key_file,
shacache_cert_file=shacache_cert_file,
shacache_key_file=shacache_key_file,
shadir_cert_file=shadir_cert_file,
shadir_key_file=shadir_key_file,
metadata_dict=metadata_dict,
)
def helper_upload_network_cached_from_directory(dir_url, cache_url, def helper_upload_network_cached_from_directory(dir_url, cache_url,
path, directory_key, metadata_dict, path, directory_key, metadata_dict,
...@@ -113,25 +84,10 @@ def helper_upload_network_cached_from_directory(dir_url, cache_url, ...@@ -113,25 +84,10 @@ def helper_upload_network_cached_from_directory(dir_url, cache_url,
""" """
Create a tar from a given directory (path) then upload it to networkcache. Create a tar from a given directory (path) then upload it to networkcache.
""" """
# Create tar file. Don't create it to /tmp dir as it can be too small. return __upload_network_cached(dir_url, cache_url,
path_dirname, path_basename = _split_last_directory(path) NetworkcacheClient.archive(path.rstrip(os.sep)), directory_key,
tarpath = os.path.join(path_dirname, '%s.tar' % path_basename)
tar = tarfile.open(tarpath, "w:gz")
try:
try:
tar.add(path, arcname=path_basename)
finally:
tar.close()
# Upload it
result = helper_upload_network_cached_from_file(dir_url, cache_url,
tarpath, directory_key, metadata_dict,
signature_private_key_file, shacache_cert_file, shacache_key_file, signature_private_key_file, shacache_cert_file, shacache_key_file,
shadir_cert_file, shadir_key_file) shadir_cert_file, shadir_key_file, metadata_dict)
finally:
# Always clean it
if os.path.exists(tarpath):
os.remove(tarpath)
return result
def helper_download_network_cached(dir_url, cache_url, def helper_download_network_cached(dir_url, cache_url,
...@@ -257,26 +213,13 @@ def helper_download_network_cached_to_directory(dir_url, cache_url, ...@@ -257,26 +213,13 @@ def helper_download_network_cached_to_directory(dir_url, cache_url,
""" """
Download a tar file from network cache and untar it to specified path. Download a tar file from network cache and untar it to specified path.
""" """
# Download tar file. Don't download to /tmp dir as it can be too small. result = helper_download_network_cached(dir_url, cache_url,
path_dirname, path_basename = _split_last_directory(path)
tarpath = os.path.join(path_dirname, '%s.tar' % path_basename)
try:
metadata_dict = helper_download_network_cached_to_file(
dir_url, cache_url,
signature_certificate_list, signature_certificate_list,
directory_key, tarpath, wanted_metadata_dict, required_key_list, directory_key, wanted_metadata_dict, required_key_list, strategy)
strategy) if result:
if metadata_dict: file_descriptor, metadata_dict = result
# Untar it to path try:
tar = tarfile.open(tarpath) NetworkcacheClient.extract(path.rstrip('/'), file_descriptor)
try: return metadata_dict
logger.info("Extracting downloaded archive from cache...") finally:
tar.extractall(path=os.path.dirname(path)) file_descriptor.close()
finally:
tar.close()
finally:
# Always clean it
if os.path.exists(tarpath):
os.remove(tarpath)
return metadata_dict
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment