Commit 9db7477c authored by Alain Takoudjou's avatar Alain Takoudjou Committed by Xavier Thompson

[feat] allow to rewrite url before download using netrc and macdef

This change is to rewrite URL of file to download through buildout.download.Download using macdef definition in netrc file.
The rewrite is based on regular expression, captures are substituted after optionally urlencoding them..

This is how macdef definition is used to rewrite URL:

.netrc:

machine HOSTNAME
login foo
password bar

macdef buildout:HOSTNAME
  REGEX_STRING
    TEMPLATE_1 HEADER1=VALUE1 HEADER2=VALUE2 ...
  ANOTHER_REGEX
    TEMPLATE_2 HEADER1=VALUE1 HEADER2=VALUE2 ...
  ...

macdef ...

HEADER1=VALUE1 string are optional, they are used to set the header. Similar to
the command `curl --header "HEADER1: VALUE1" ...`. Headers can
be repeated as it's needed.

REGEX_STRING is used to match the path and query (if present) of the url we are trying to download.
for example: the regex `/(.*)/-/raw/([\w\-]+)/(.*)` for url
'https://lab.nexedi.com/namespace/project/-/raw/master/README.md'

                /(.*)/-/raw/([\w\-]+)/(.*)
      {0}         {1}          {2}     {3}

TEMPLATE is the new full url with scheme and authority (netloc). All captures are used to format
the template and the headers. It's possible to encode string while formatting, for now only quote
method is supported. Captured groups from REGEX_STRING start at {1}; {0} is the base url.

Example of template and headers:

{0}/api/v4/projects/{1.quote}/repository/files/{3}/raw?ref={2} Authentication={2}

With the regex and template above, the url 'https://lab.nexedi.com/namespace/project/-/raw/master/README.md'
is rewritten to: https://lab.nexedi.com/api/v4/projects/namespace%2Fproject/repository/files/README.md/raw?ref=master

{0} is the base URL (https://lab.nexedi.com)
{1} match 'namespace/project'
{2} match 'master' and
{3} match 'README.md'

`namespace/project` is changed to `namespace%2Fproject` since it's encoded encoded with `quote`.

Some uses cases:

machine lab.nexedi.com
login ignored
password <ACCESS_TOKEN>

macdef buildout:lab.nexedi.com
  /(.*)/-/raw/([\w\-]+)/(.*)
    {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=<ACCESS_TOKEN>

or

macdef buildout:lab.nexedi.com
  /(.*)/raw/([\w\-]+)/(.*)
    {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} Authorization="Bearer <OAUTH-TOKEN>"
  /(.*)/-/raw/([\w\-]+)/(.*)
    {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=<ACCESS_TOKEN>
parent 92fc476b
......@@ -22,12 +22,11 @@ try:
# Python 3
from urllib.error import HTTPError
from urllib.request import Request, urlopen
from urllib.parse import urlparse, urlunparse
from urllib.parse import urlparse, urlsplit, quote
except ImportError:
# Python 2
from urlparse import urlparse
from urlparse import urlunparse
from urllib2 import HTTPError, Request, urlopen
from urlparse import urlparse, urlsplit
from urllib2 import HTTPError, Request, urlopen, quote
from zc.buildout.easy_install import realpath
from base64 import b64encode
......@@ -38,6 +37,7 @@ import netrc
import os
import os.path
import re
import shlex
import shutil
import tempfile
import zc.buildout
......@@ -60,6 +60,12 @@ class netrc(netrc.netrc):
self.__init__(os.devnull)
return self.authenticators(host)
class Group(str):
@property
def quote(self):
return quote(self, '')
netrc = netrc()
class ChecksumError(zc.buildout.UserError):
......@@ -273,25 +279,50 @@ class Download(object):
url_host, url_port = parsed[-2:]
return '%s:%s' % (url_host, url_port)
def _rewrite_url(self, base_url, path_query, line_list):
# line_list = list of line for selected macdef
for pattern, template in zip(*[iter(line_list)]*2):
match = re.match(pattern.strip() + '$', # PY3: re.fullmatch
path_query)
if match is not None:
group_list = [base_url]
group_list += map(Group, match.groups())
return [s.format(*group_list) for s in shlex.split(template)]
def _auth(self, url):
parsed_url = urlparse(url)
parsed_url = urlsplit(url)
if parsed_url.scheme in ('http', 'https'):
auth_host = parsed_url.netloc.rsplit('@', 1)
if len(auth_host) > 1:
return (auth_host[0],
parsed_url._replace(netloc=auth_host[1]).geturl())
None,
parsed_url._replace(netloc=auth_host[1]).geturl(),
(),
)
auth = netrc.authenticators(parsed_url.hostname)
if auth:
return '{0}:{2}'.format(*auth), url
if auth is None:
return
new = self._rewrite_url(
parsed_url._replace(path='', query='', fragment='').geturl(),
parsed_url._replace(scheme='', netloc='', fragment='').geturl(),
netrc.macros.get('buildout:' + parsed_url.hostname, ()),
) or [url]
return auth[0], auth[2], new.pop(0), new
def urlretrieve(self, url, tmp_path):
auth = self._auth(url)
if auth:
req = Request(auth[1])
req = Request(auth[2])
if url != auth[2]:
self.logger.info('Downloading from url: %s', auth[2])
for header in auth[3]:
req.add_header(*header.split('=', 1))
cred = auth[0] if auth[1] is None else '{0}:{1}'.format(*auth)
req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth[0]))))
"Basic " + bytes2str(b64encode(str2bytes(cred))))
else:
req = url
with closing(urlopen(req)) as src:
with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst)
......
......@@ -18,10 +18,12 @@ try:
# Python 3
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import urlopen
from urllib.parse import urlparse, unquote, parse_qsl
except ImportError:
# Python 2
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urllib2 import urlopen
from urllib2 import urlopen, unquote
from urlparse import urlparse, parse_qsl
import base64
import errno
......@@ -395,6 +397,7 @@ class Handler(BaseHTTPRequestHandler):
def __init__(self, request, address, server):
self.__server = server
self.tree = server.tree
self.url = "http://%s:%s/" % (server.server_name, server.server_port)
BaseHTTPRequestHandler.__init__(self, request, address, server)
def do_GET(self):
......@@ -410,6 +413,14 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers()
self.wfile.write(out)
def forbidden():
self.send_response(403, 'Forbidden')
out = b'<html><body>Forbidden</body></html>'
self.send_header('Content-Length', str(len(out)))
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(out)
if self.path == '/enable_server_logging':
self.__server.__log = True
return k()
......@@ -419,21 +430,32 @@ class Handler(BaseHTTPRequestHandler):
return k()
if self.path.startswith('/private/'):
parsed = urlparse(self.path)
auth = self.headers.get('Authorization')
if auth and auth.startswith('Basic ') and \
self.path[9:].encode() == base64.b64decode(
parsed.path[9:].encode() == base64.b64decode(
self.headers.get('Authorization')[6:]):
if parsed.query:
for h, v in parse_qsl(parsed.query):
if self.headers.get(h) != v:
return forbidden()
return k()
# But not returning 401+WWW-Authenticate, we check that the client
# skips auth challenge, which is not free (in terms of performance)
# and useless for what we support.
self.send_response(403, 'Forbidden')
out = '<html><body>Forbidden</body></html>'.encode()
self.send_header('Content-Length', str(len(out)))
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(out)
return
return forbidden()
if self.path.startswith('/namespace/project/-/raw/master/'):
# This path is private and need /api
return forbidden()
if self.path.startswith('/api/v4/projects/namespace%2Fproject/repository/files/'):
# path is : /api/v4/projects/namespace%2Fproject/repository/files/FILE_PATH_XXXX/raw/?ref=master
u = unquote(self.path.split('/')[7])
username, password = u.split(':')
token = self.headers.get('PRIVATE-TOKEN')
if password == token and username == "private_token":
return k()
return forbidden()
path = os.path.abspath(os.path.join(self.tree, *self.path.split('/')))
if not (
......
......@@ -183,6 +183,50 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> is_temp; remove(path)
True
>>> os.environ['HOME'] = old_home
>>> remove(netrc)
Gitlab private token file with netrc:
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> import os, zc.buildout.download
>>> old_home = os.environ['HOME']
>>> home = os.environ['HOME'] = tmpdir('test-laburl')
>>> netrc = join(home, '.netrc')
>>> c = "machine localhost login foo password bar\n\nmacdef buildout:localhost\n\t/(.*)/foo/([\w\-]+)/bar \n\t\t{0}/bar/{1}/file/foo?ref={2.quote} \n\t/(.*)/-/raw/([\w\-]+)/(.*) \n\t\t{0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2.quote} PRIVATE-TOKEN=TOKENXXX "
>>> write(netrc, c)
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
Gitlab private raw with netrc and no macdef:
>>> remove(netrc)
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENPROJECT'
>>> write(netrc, 'machine localhost/namespace/project login private_token password TOKENPROJECT')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
netrc with macdef rewrite any url
>>> remove(netrc)
>>> c = 'machine localhost login foo password bar\n\nmacdef buildout:localhost\n\t/(\w+)/redirect/(\w+):(\w+)/(\w+\s\d).* \n\t\t{0}/private/{2}:{3} h1={4.quote} h2={4} token=key token2="value 2" '
>>> write(netrc, c)
>>> url = server_url + 'notfound/redirect/foo:bar/test 2?h1=test%202&h2=test 2&token=key&token2=value 2'
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(url)
>>> is_temp; remove(path)
True
>>> os.environ['HOME'] = old_home
>>> remove(netrc)
Downloading using the download cache
------------------------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment