Commit b6d53181 authored by Ophélie Gagnard's avatar Ophélie Gagnard

Download: Add new criteria and new syntax for selection.

Syntax:
KEY[>=,<=,>,<,=,:]VALUE
VALUE is expected to be json dump except when the delimiter is ":"
With ":" delimiter, accepted special words are "max" and "min".

Selection:
For the sake of lisibility, the shadir entries are preprocessed in order
to eliminate invalid metadata. This is slower but the metadata is
exepected to be small as each entry may require a database access on
the server shacache side.
parent 7e71f9da
...@@ -16,7 +16,9 @@ import argparse ...@@ -16,7 +16,9 @@ import argparse
import hashlib import hashlib
import json import json
import logging import logging
import operator
import os import os
import re
import ssl import ssl
import shutil import shutil
import sys import sys
...@@ -50,6 +52,14 @@ UPLOAD_TIMEOUT = 60 * 60 ...@@ -50,6 +52,14 @@ UPLOAD_TIMEOUT = 60 * 60
logger = logging.getLogger('networkcache') logger = logging.getLogger('networkcache')
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
parse_criterion = re.compile("([<>]=?|==|:)").split
operator_mapping = {
">=": operator.ge,
"<=": operator.le,
">": operator.gt,
"<": operator.lt,
"==": operator.eq,
}
class short_exc_info(tuple): class short_exc_info(tuple):
...@@ -411,7 +421,7 @@ class NetworkcacheClient(object): ...@@ -411,7 +421,7 @@ class NetworkcacheClient(object):
return CheckResponse(self._request('cache', sha512sum), sha512sum) return CheckResponse(self._request('cache', sha512sum), sha512sum)
def select(self, key, wanted_metadata_dict={}, required_key_list=frozenset()): def select(self, key, wanted_metadata_dict={}, required_key_list=frozenset()):
'''Return an iterator over shadir entries that match given criteria ''' Return an iterator over shadir entries that match given criteria
''' '''
required_key_test = frozenset(required_key_list).issubset required_key_test = frozenset(required_key_list).issubset
data_list = self.select_generic(key, self.signature_certificate_list) data_list = self.select_generic(key, self.signature_certificate_list)
...@@ -435,6 +445,53 @@ class NetworkcacheClient(object): ...@@ -435,6 +445,53 @@ class NetworkcacheClient(object):
else: else:
yield information_dict yield information_dict
def select_special_syntax(self, key, selection_criterion_list=[]):
''' Return a list of shadir entries that match given criteria
'''
data_dict_list = list(self.select(key))
parsed_criterion_list = self.parse_special_syntax(selection_criterion_list)
for criterion in parsed_criterion_list:
filtered_data_dict_list = []
if criterion[1] == ":":
if criterion[2] == "max":
extremum = max((data_dict[criterion[0]] for data_dict in data_dict_list))
elif criterion[2] == "min":
extremum = min((data_dict[criterion[0]] for data_dict in data_dict_list))
filtered_data_dict_list += [data_dict for data_dict in data_dict_list if data_dict[criterion[0]] == extremum]
else:
for data_dict in data_dict_list:
try:
if criterion[0] in data_dict and operator_mapping[criterion[1]](data_dict[criterion[0]], criterion[2]):
filtered_data_dict_list.append(data_dict)
except TypeError:
logger.info('Comparison failed: %s %s %s\n \
with types: %s %s %s',
data_dict[criterion[0]], criterion[1], criterion[2],
type(data_dict[criterion[0]]), type(criterion[1]), type(criterion[2]))
data_dict_list = filtered_data_dict_list
return data_dict_list
def parse_special_syntax(self, selection_criterion_list):
''' Return a list of parsed selection criteria
'''
special_word_list = ["max", "min"]
parsed_criterion_list = []
for criterion in selection_criterion_list:
parsed_criterion = parse_criterion(criterion, maxsplit=1)
if len(parsed_criterion) == 3:
if parsed_criterion[1] in operator_mapping:
parsed_criterion[2] = json.loads(parsed_criterion[2])
parsed_criterion_list.append(parsed_criterion)
else: # separator is ":"
if parsed_criterion[2] in special_word_list:
parsed_criterion_list.append(parsed_criterion)
else:
raise NetworkcacheException('Unknown special word "%s"' % parsed_criterion[2])
else:
raise NetworkcacheException('Could not parse criterion: either no separator or invalid separator (%r)',
criterion)
return parsed_criterion_list
def select_generic(self, key, filter=True): def select_generic(self, key, filter=True):
''' Select trustable entries from shadir. ''' Select trustable entries from shadir.
''' '''
...@@ -522,14 +579,14 @@ def cmd_upload(*args): ...@@ -522,14 +579,14 @@ def cmd_upload(*args):
if args.metadata: if args.metadata:
with open(args.metadata) as g: with open(args.metadata) as g:
try: try:
metadata_dict = json.loads(g.read()) metadata_dict = json.load(g)
if type(metadata_dict) != dict: except json.decoder.JSONDecodeError as e:
raise NetworkcacheException("Not a json-serializable dictionary: %s" % args.metadata) sys.exit("%s: %s" % (args.metadata, e))
except json.decoder.JSONDecodeError: if type(metadata_dict) is not dict:
raise NetworkcacheException("Invalid json in %s" % args.metadata) sys.exit("Not a dictionary: %s" % args.metadata)
else: else:
metadata_dict = dict() metadata_dict = {}
metadata_dict.update(dict(x.split('=', 1) for x in args.meta)) metadata_dict.update(x.split('=', 1) for x in args.meta)
if args.id: if args.id:
metadata_dict.setdefault('id', args.id) metadata_dict.setdefault('id', args.id)
key = args.id key = args.id
...@@ -545,16 +602,19 @@ def cmd_download(*args): ...@@ -545,16 +602,19 @@ def cmd_download(*args):
parser = _newArgumentParser("URL of data to download." + key_help) parser = _newArgumentParser("URL of data to download." + key_help)
parser.add_argument('--id', parser.add_argument('--id',
help="Identifier of the shadir URL, overriding --prefix-key and --suffix-key.") help="Identifier of the shadir URL, overriding --prefix-key and --suffix-key.")
parser.add_argument('meta', nargs='*', metavar='KEY=VALUE', parser.add_argument('meta', nargs='*', metavar='KEY[>=,<=,==,<,>,:]VALUE',
help="Extra metadata.") help='Extra metadata. VALUE is expected to be a json dump except when the separator is ":".')
args = parser.parse_args(args or sys.argv[1:]) args = parser.parse_args(args or sys.argv[1:])
nc = NetworkcacheClient(args.config) nc = NetworkcacheClient(args.config)
kw = dict(x.split('=', 1) for x in args.meta)
if args.id: if args.id:
key = args.id key = args.id
else: else:
urlmd5 = hashlib.md5(args.url.encode()).hexdigest() urlmd5 = hashlib.md5(args.url.encode()).hexdigest()
key = args.prefix_key + urlmd5 + args.suffix_key key = args.prefix_key + urlmd5 + args.suffix_key
f = sys.stdout f = sys.stdout
shutil.copyfileobj(nc.download(next(nc.select(key, kw))['sha512']), data_list = nc.select_special_syntax(key, args.meta)
getattr(f, 'buffer', f)) if not data_list:
sys.exit("No result found with given criteria.")
shutil.copyfileobj(nc.download(data_list[0]['sha512']),
getattr(f, 'buffer', f))
f.close()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment