Commit 6f1b5792 authored by Alain Takoudjou's avatar Alain Takoudjou Committed by Xavier Thompson

[feat] allow to rewrite url before download using netrc and macdef

This adds a generic mechanism in the Download API to rewrite the URL
to be downloaded, possibly with extra headers. Substitued groups from
the matching regular expression can be optionally quoted.

.netrc:

macdef buildout:HOSTNAME
  RULE_1_REGEX
    RULE_1_NEW_URL HEADER1=VALUE1 HEADER2=VALUE2 ...
  RULE_2_REGEX
    RULE_2_NEW_URL ...
  ...

macdef buildout:OTHER_HOSTNAME
  ...

A rewriting rule is defined by a pair of lines with optional indentation
(only there for readability).

The first line of a rule is a regex that matches fully against
the path?query part of the URL.

If the second line is empty, the request isn't changed.
Else parts are parsed using shell-like syntax:
- the first one must produce the full URL to download
- the next ones are optional headers to send
- each part is subject to regex substitution using the
  Python Format Specification Mini-Language:
  captured grouped from the first line are numbered starting from 1,
  {0} is the base URL (scheme://location) and
  the optional `quote` attribute returns the urlencoded value

A use case is to work around
  https://gitlab.com/gitlab-org/gitlab/-/issues/19189
for example with the following .netrc:

    macdef buildout:lab.nexedi.com
      /(.+)/-/raw/([^/]+)/(.+)
        {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=<ACCESS_TOKEN>
parent 3bdbff23
......@@ -22,12 +22,11 @@ try:
# Python 3
from urllib.error import HTTPError
from urllib.request import Request, urlopen
from urllib.parse import urlparse, urlunparse
from urllib.parse import urlparse, urlsplit, quote
except ImportError:
# Python 2
from urlparse import urlparse
from urlparse import urlunparse
from urllib2 import HTTPError, Request, urlopen
from urlparse import urlparse, urlsplit
from urllib2 import HTTPError, Request, urlopen, quote
from zc.buildout.easy_install import realpath
from base64 import b64encode
......@@ -38,6 +37,7 @@ import netrc
import os
import os.path
import re
import shlex
import shutil
import tempfile
import zc.buildout
......@@ -60,6 +60,12 @@ class netrc(netrc.netrc):
self.__init__(os.devnull)
return self.authenticators(host)
class Group(str):
@property
def quote(self):
return quote(self, '')
netrc = netrc()
class ChecksumError(zc.buildout.UserError):
......@@ -273,25 +279,50 @@ class Download(object):
url_host, url_port = parsed[-2:]
return '%s:%s' % (url_host, url_port)
def _rewrite_url(self, base_url, path_query, line_list):
# line_list = list of line for selected macdef
for pattern, template in zip(*[iter(line_list)]*2):
match = re.match(pattern.strip() + '$', # PY3: re.fullmatch
path_query)
if match is not None:
group_list = [base_url]
group_list += map(Group, match.groups())
return [s.format(*group_list) for s in shlex.split(template)]
def _auth(self, url):
parsed_url = urlparse(url)
parsed_url = urlsplit(url)
if parsed_url.scheme in ('http', 'https'):
auth_host = parsed_url.netloc.rsplit('@', 1)
if len(auth_host) > 1:
return (auth_host[0],
parsed_url._replace(netloc=auth_host[1]).geturl())
None,
parsed_url._replace(netloc=auth_host[1]).geturl(),
(),
)
auth = netrc.authenticators(parsed_url.hostname)
if auth:
return '{0}:{2}'.format(*auth), url
if auth is None:
return
new = self._rewrite_url(
parsed_url._replace(path='', query='', fragment='').geturl(),
parsed_url._replace(scheme='', netloc='', fragment='').geturl(),
netrc.macros.get('buildout:' + parsed_url.hostname, ()),
) or [url]
return auth[0], auth[2], new.pop(0), new
def urlretrieve(self, url, tmp_path):
auth = self._auth(url)
if auth:
req = Request(auth[1])
req = Request(auth[2])
if url != auth[2]:
self.logger.info('Downloading from url: %s', auth[2])
for header in auth[3]:
req.add_header(*header.split('=', 1))
cred = auth[0] if auth[1] is None else ':'.join(auth[:2])
req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth[0]))))
"Basic " + bytes2str(b64encode(str2bytes(cred))))
else:
req = url
with closing(urlopen(req)) as src:
with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst)
......
......@@ -18,10 +18,12 @@ try:
# Python 3
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import urlopen
from urllib.parse import urlparse, unquote, parse_qsl
except ImportError:
# Python 2
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urllib2 import urlopen
from urllib2 import urlopen, unquote
from urlparse import urlparse, parse_qsl
import base64
import errno
......@@ -410,6 +412,14 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers()
self.wfile.write(out)
def forbidden():
self.send_response(403, 'Forbidden')
out = b'<html><body>Forbidden</body></html>'
self.send_header('Content-Length', str(len(out)))
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(out)
if self.path == '/enable_server_logging':
self.__server.__log = True
return k()
......@@ -419,21 +429,32 @@ class Handler(BaseHTTPRequestHandler):
return k()
if self.path.startswith('/private/'):
parsed = urlparse(self.path)
auth = self.headers.get('Authorization')
if auth and auth.startswith('Basic ') and \
self.path[9:].encode() == base64.b64decode(
parsed.path[9:].encode() == base64.b64decode(
self.headers.get('Authorization')[6:]):
if parsed.query:
for h, v in parse_qsl(parsed.query):
if self.headers.get(h) != v:
return forbidden()
return k()
# But not returning 401+WWW-Authenticate, we check that the client
# skips auth challenge, which is not free (in terms of performance)
# and useless for what we support.
self.send_response(403, 'Forbidden')
out = '<html><body>Forbidden</body></html>'.encode()
self.send_header('Content-Length', str(len(out)))
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(out)
return
return forbidden()
if self.path.startswith('/namespace/project/-/raw/master/'):
# This path is private and need /api
return forbidden()
if self.path.startswith('/api/v4/projects/namespace%2Fproject/repository/files/'):
# path is: .../files/FILE_PATH_XXXX/raw/?ref=master
u = unquote(self.path.split('/')[7])
username, password = u.split(':')
token = self.headers.get('PRIVATE-TOKEN')
if password == token and username == "private_token":
return k()
return forbidden()
path = os.path.abspath(os.path.join(self.tree, *self.path.split('/')))
if not (
......
......@@ -183,6 +183,50 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> is_temp; remove(path)
True
>>> os.environ['HOME'] = old_home
>>> remove(netrc)
Gitlab private token file with netrc:
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> import os, zc.buildout.download
>>> old_home = os.environ['HOME']
>>> home = os.environ['HOME'] = tmpdir('test-laburl')
>>> netrc = join(home, '.netrc')
>>> c = "machine localhost\nlogin foo\npassword bar\n\nmacdef buildout:localhost\n\t/(.+)/foo/([^/]+)/bar \n\t\t{0}/bar/{1}/file/foo?ref={2} \n\t/(.+)/-/raw/([^/]+)/(.+) \n\t\t{0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=TOKENXXX\n\n"
>>> write(netrc, c)
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
Gitlab private raw with netrc and no macdef:
>>> remove(netrc)
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENPROJECT'
>>> write(netrc, 'machine localhost/namespace/project login private_token password TOKENPROJECT')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
netrc with macdef rewrite any url
>>> remove(netrc)
>>> c = 'machine localhost\nlogin foo\npassword bar\n\nmacdef buildout:localhost\n\t/(\w+)/redirect/(\w+):(\w+)/(\w+\s\d).* \n\t\t{0}/private/{2}:{3} h1={4.quote} h2={4} token=key token2="value 2"\n\n'
>>> write(netrc, c)
>>> url = server_url + 'notfound/redirect/foo:bar/test 2?h1=test%202&h2=test 2&token=key&token2=value 2'
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(url)
>>> is_temp; remove(path)
True
>>> os.environ['HOME'] = old_home
>>> remove(netrc)
Downloading using the download cache
------------------------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment