Commit a0105b6d authored by Ophélie Gagnard's avatar Ophélie Gagnard Committed by Ophélie Gagnard

Download: Add new criteria and new syntax for selection.

Syntax:
KEY{>=,<=,>,<,==,:}VALUE
VALUE is expected to be json dump except when the delimiter is ":"
With ":" delimiter, accepted special words are "max" and "min".

Selection:
For the sake of lisibility, the shadir entries are preprocessed in order
to eliminate invalid metadata. This is slower but the metadata is
exepected to be small as each entry may require a database access on
the server shacache side.
parent 7e71f9da
......@@ -16,7 +16,9 @@ import argparse
import hashlib
import json
import logging
import operator
import os
import re
import ssl
import shutil
import sys
......@@ -411,7 +413,7 @@ class NetworkcacheClient(object):
return CheckResponse(self._request('cache', sha512sum), sha512sum)
def select(self, key, wanted_metadata_dict={}, required_key_list=frozenset()):
'''Return an iterator over shadir entries that match given criteria
''' Return an iterator over shadir entries that match given criteria
'''
required_key_test = frozenset(required_key_list).issubset
data_list = self.select_generic(key, self.signature_certificate_list)
......@@ -473,35 +475,101 @@ class NetworkcacheClient(object):
pass
return False
class NetworkcacheFilter(object):
special_word_mapping = {"max":max, "min":max}
parse_criterion = re.compile("([<>]=?|==|:)").split
operator_mapping = {
">=": operator.ge,
"<=": operator.le,
">": operator.gt,
"<": operator.lt,
"==": operator.eq,
}
def __init__(self, criterion_list=()):
''' Return a list of parsed selection criteria
'''
if type(criterion_list) is tuple and len(criterion_list) == 3:
self.criterion_list = criterion_list
elif type(criterion_list) is list:
parsed_criterion_list = []
for criterion in criterion_list:
parsed_criterion = self.parse_criterion(criterion, maxsplit=1)
if len(parsed_criterion) != 3:
raise NetworkcacheException(
'Could not parse criterion: missing or invalid separator (%s)'
% criterion)
if parsed_criterion[1] != ':':
parsed_criterion[2] = json.loads(parsed_criterion[2])
elif parsed_criterion[2] not in self.special_word_mapping:
raise NetworkcacheException('Unknown special word %r'
% parsed_criterion[2])
parsed_criterion_list.append(parsed_criterion)
self.criterion_list = parsed_criterion_list
else:
raise NetworkcacheException('Invalid criteria: %s' % criterion_list)
def __call__(self, data_dict_iterator):
''' Return a list of shadir entries that match given criteria
'''
# converting generator into list because the min/max case whould exhaust it
data_dict_list = list(data_dict_iterator)
for key, op, value in self.criterion_list:
data_dict_list = [data_dict for data_dict in data_dict_list if key in data_dict]
if not data_dict_list:
break
if op == ":":
extremum = self.special_word_mapping[value](
data_dict[key] for data_dict in data_dict_list)
data_dict_list = [data_dict for data_dict in data_dict_list
if data_dict[key] == extremum]
else:
filtered_data_dict_list = []
for data_dict in data_dict_list:
try:
if self.operator_mapping[op](data_dict[key], value):
filtered_data_dict_list.append(data_dict)
except TypeError:
logger.info('Comparison failed: %s %s %s'
' with types: %s %s',
data_dict[key], op, value,
type(data_dict[key]), type(value))
data_dict_list = filtered_data_dict_list
return data_dict_list
class NetworkcacheException(Exception):
pass
DirectoryNotFound = UploadError = NetworkcacheException # BBB
key_help = \
" The key will be concatenation of PREFIX_KEY, md5(URL) and SUFFIX_KEY."
key_help = (
"The identifier under which the data is indexed."
" Defaults to 'file-urlmd5:md5(URL)'"
)
def _newArgumentParser(url_help):
def _newArgumentParser(url_help, key_help, key_required):
parser = argparse.ArgumentParser()
parser.add_argument('--config', type=argparse.FileType('r'), required=True,
help='SlapOS configuration file.')
parser.add_argument('--prefix-key', default='',
help="Prefix used for the shadir URL, not a cryptografic key.")
parser.add_argument('--suffix-key', default='',
help="Suffix used for the shadir URL, not a cryptografic key.")
parser.add_argument('--url', help=url_help)
_ = parser.add_mutually_exclusive_group(required=key_required).add_argument
_('--key', help=key_help)
_('--url', help=url_help)
return parser
def cmd_upload(*args):
parser = _newArgumentParser(
"Upload data pointed to by this argument, unless --file is specified."
" Non-local contents is first downloaded to a temporary file."
"%s If not given, the uploaded data is not indexed." % key_help)
" Non-local contents is first downloaded to a temporary file.",
key_help + " if --url is given, else with neither --url nor --key the uploaded data is not indexed."
" This should be a unique value that refers to related entries,"
" and that starts with 'SCHEME:' where SCHEME"
" indicates how (or by what) the data is processed."
" For performance reasons, avoid having too"
" many entries by making your key more specific.",
False)
parser.add_argument('--file',
help="Upload the contents of this file, overriding --url")
parser.add_argument('--id',
help="Identifier used for the shadir URL. Overriding --prefix-key, --suffix-key and --url")
help="Upload the contents of this file, overriding --url.")
parser.add_argument('--metadata',
help="Take a file containing a json-serializable dictionary with shadir metadata.")
parser.add_argument('meta', nargs='*', metavar='KEY=VALUE',
......@@ -512,7 +580,7 @@ def cmd_upload(*args):
try:
if args.file:
f = open(args.file, 'rb')
if not args.url and not args.id:
if not args.url and not args.key: # no shadir entry
nc.upload(f)
return
elif args.url:
......@@ -522,39 +590,39 @@ def cmd_upload(*args):
if args.metadata:
with open(args.metadata) as g:
try:
metadata_dict = json.loads(g.read())
if type(metadata_dict) != dict:
raise NetworkcacheException("Not a json-serializable dictionary: %s" % args.metadata)
except json.decoder.JSONDecodeError:
raise NetworkcacheException("Invalid json in %s" % args.metadata)
metadata_dict = json.load(g)
except json.decoder.JSONDecodeError as e:
sys.exit("%s: %s" % (args.metadata, e))
if type(metadata_dict) is not dict:
sys.exit("Not a dictionary: %s" % args.metadata)
else:
metadata_dict = dict()
metadata_dict.update(dict(x.split('=', 1) for x in args.meta))
if args.id:
metadata_dict.setdefault('id', args.id)
key = args.id
metadata_dict = {}
metadata_dict.update(x.split('=', 1) for x in args.meta)
if args.key:
identifier = args.key
else:
metadata_dict.setdefault('url', args.url)
urlmd5 = hashlib.md5(args.url.encode()).hexdigest()
key = args.prefix_key + urlmd5 + args.suffix_key
nc.upload(f, key, **metadata_dict)
identifier = "file-urlmd5:" + urlmd5
nc.upload(f, identifier, **metadata_dict)
finally:
f is None or f.close()
def cmd_download(*args):
parser = _newArgumentParser("URL of data to download." + key_help)
parser.add_argument('--id',
help="Identifier of the shadir URL, overriding --prefix-key and --suffix-key.")
parser.add_argument('meta', nargs='*', metavar='KEY=VALUE',
help="Extra metadata.")
parser = _newArgumentParser("URL of data to download.", key_help, True)
parser.add_argument('meta', nargs='*', metavar='KEY{>=,<=,==,<,>,:}VALUE',
help='Filter metadata. Each argument represents a filter with a comparison condition. The filters will be applied one by one with the arguments processed in the order of appearance.VALUE is expected to be a json dump of a comparable object in Python (strings included), except when the separator is ":" (in this case VALUE must be min or max).')
args = parser.parse_args(args or sys.argv[1:])
nc = NetworkcacheClient(args.config)
kw = dict(x.split('=', 1) for x in args.meta)
if args.id:
key = args.id
if args.key:
identifier = args.key
else:
urlmd5 = hashlib.md5(args.url.encode()).hexdigest()
key = args.prefix_key + urlmd5 + args.suffix_key
identifier = "file-urlmd5:" + urlmd5
f = sys.stdout
shutil.copyfileobj(nc.download(next(nc.select(key, kw))['sha512']),
getattr(f, 'buffer', f))
data_list = NetworkcacheFilter(args.meta)(nc.select(identifier))
if not data_list:
sys.exit("No result found with given criteria.")
shutil.copyfileobj(nc.download(data_list[0]['sha512']),
getattr(f, 'buffer', f))
f.close()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment