Commit 9db7477c authored by Alain Takoudjou's avatar Alain Takoudjou Committed by Xavier Thompson

[feat] allow to rewrite url before download using netrc and macdef

This change is to rewrite URL of file to download through buildout.download.Download using macdef definition in netrc file.
The rewrite is based on regular expression, captures are substituted after optionally urlencoding them..

This is how macdef definition is used to rewrite URL:

.netrc:

machine HOSTNAME
login foo
password bar

macdef buildout:HOSTNAME
  REGEX_STRING
    TEMPLATE_1 HEADER1=VALUE1 HEADER2=VALUE2 ...
  ANOTHER_REGEX
    TEMPLATE_2 HEADER1=VALUE1 HEADER2=VALUE2 ...
  ...

macdef ...

HEADER1=VALUE1 string are optional, they are used to set the header. Similar to
the command `curl --header "HEADER1: VALUE1" ...`. Headers can
be repeated as it's needed.

REGEX_STRING is used to match the path and query (if present) of the url we are trying to download.
for example: the regex `/(.*)/-/raw/([\w\-]+)/(.*)` for url
'https://lab.nexedi.com/namespace/project/-/raw/master/README.md'

                /(.*)/-/raw/([\w\-]+)/(.*)
      {0}         {1}          {2}     {3}

TEMPLATE is the new full url with scheme and authority (netloc). All captures are used to format
the template and the headers. It's possible to encode string while formatting, for now only quote
method is supported. Captured groups from REGEX_STRING start at {1}; {0} is the base url.

Example of template and headers:

{0}/api/v4/projects/{1.quote}/repository/files/{3}/raw?ref={2} Authentication={2}

With the regex and template above, the url 'https://lab.nexedi.com/namespace/project/-/raw/master/README.md'
is rewritten to: https://lab.nexedi.com/api/v4/projects/namespace%2Fproject/repository/files/README.md/raw?ref=master

{0} is the base URL (https://lab.nexedi.com)
{1} match 'namespace/project'
{2} match 'master' and
{3} match 'README.md'

`namespace/project` is changed to `namespace%2Fproject` since it's encoded encoded with `quote`.

Some uses cases:

machine lab.nexedi.com
login ignored
password <ACCESS_TOKEN>

macdef buildout:lab.nexedi.com
  /(.*)/-/raw/([\w\-]+)/(.*)
    {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=<ACCESS_TOKEN>

or

macdef buildout:lab.nexedi.com
  /(.*)/raw/([\w\-]+)/(.*)
    {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} Authorization="Bearer <OAUTH-TOKEN>"
  /(.*)/-/raw/([\w\-]+)/(.*)
    {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=<ACCESS_TOKEN>
parent 92fc476b
...@@ -22,12 +22,11 @@ try: ...@@ -22,12 +22,11 @@ try:
# Python 3 # Python 3
from urllib.error import HTTPError from urllib.error import HTTPError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from urllib.parse import urlparse, urlunparse from urllib.parse import urlparse, urlsplit, quote
except ImportError: except ImportError:
# Python 2 # Python 2
from urlparse import urlparse from urlparse import urlparse, urlsplit
from urlparse import urlunparse from urllib2 import HTTPError, Request, urlopen, quote
from urllib2 import HTTPError, Request, urlopen
from zc.buildout.easy_install import realpath from zc.buildout.easy_install import realpath
from base64 import b64encode from base64 import b64encode
...@@ -38,6 +37,7 @@ import netrc ...@@ -38,6 +37,7 @@ import netrc
import os import os
import os.path import os.path
import re import re
import shlex
import shutil import shutil
import tempfile import tempfile
import zc.buildout import zc.buildout
...@@ -60,6 +60,12 @@ class netrc(netrc.netrc): ...@@ -60,6 +60,12 @@ class netrc(netrc.netrc):
self.__init__(os.devnull) self.__init__(os.devnull)
return self.authenticators(host) return self.authenticators(host)
class Group(str):
@property
def quote(self):
return quote(self, '')
netrc = netrc() netrc = netrc()
class ChecksumError(zc.buildout.UserError): class ChecksumError(zc.buildout.UserError):
...@@ -273,25 +279,50 @@ class Download(object): ...@@ -273,25 +279,50 @@ class Download(object):
url_host, url_port = parsed[-2:] url_host, url_port = parsed[-2:]
return '%s:%s' % (url_host, url_port) return '%s:%s' % (url_host, url_port)
def _rewrite_url(self, base_url, path_query, line_list):
# line_list = list of line for selected macdef
for pattern, template in zip(*[iter(line_list)]*2):
match = re.match(pattern.strip() + '$', # PY3: re.fullmatch
path_query)
if match is not None:
group_list = [base_url]
group_list += map(Group, match.groups())
return [s.format(*group_list) for s in shlex.split(template)]
def _auth(self, url): def _auth(self, url):
parsed_url = urlparse(url) parsed_url = urlsplit(url)
if parsed_url.scheme in ('http', 'https'): if parsed_url.scheme in ('http', 'https'):
auth_host = parsed_url.netloc.rsplit('@', 1) auth_host = parsed_url.netloc.rsplit('@', 1)
if len(auth_host) > 1: if len(auth_host) > 1:
return (auth_host[0], return (auth_host[0],
parsed_url._replace(netloc=auth_host[1]).geturl()) None,
parsed_url._replace(netloc=auth_host[1]).geturl(),
(),
)
auth = netrc.authenticators(parsed_url.hostname) auth = netrc.authenticators(parsed_url.hostname)
if auth: if auth is None:
return '{0}:{2}'.format(*auth), url return
new = self._rewrite_url(
parsed_url._replace(path='', query='', fragment='').geturl(),
parsed_url._replace(scheme='', netloc='', fragment='').geturl(),
netrc.macros.get('buildout:' + parsed_url.hostname, ()),
) or [url]
return auth[0], auth[2], new.pop(0), new
def urlretrieve(self, url, tmp_path): def urlretrieve(self, url, tmp_path):
auth = self._auth(url) auth = self._auth(url)
if auth: if auth:
req = Request(auth[1]) req = Request(auth[2])
if url != auth[2]:
self.logger.info('Downloading from url: %s', auth[2])
for header in auth[3]:
req.add_header(*header.split('=', 1))
cred = auth[0] if auth[1] is None else '{0}:{1}'.format(*auth)
req.add_header("Authorization", req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth[0])))) "Basic " + bytes2str(b64encode(str2bytes(cred))))
else: else:
req = url req = url
with closing(urlopen(req)) as src: with closing(urlopen(req)) as src:
with open(tmp_path, 'wb') as dst: with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst) shutil.copyfileobj(src, dst)
......
...@@ -18,10 +18,12 @@ try: ...@@ -18,10 +18,12 @@ try:
# Python 3 # Python 3
from http.server import HTTPServer, BaseHTTPRequestHandler from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import urlopen from urllib.request import urlopen
from urllib.parse import urlparse, unquote, parse_qsl
except ImportError: except ImportError:
# Python 2 # Python 2
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urllib2 import urlopen from urllib2 import urlopen, unquote
from urlparse import urlparse, parse_qsl
import base64 import base64
import errno import errno
...@@ -395,6 +397,7 @@ class Handler(BaseHTTPRequestHandler): ...@@ -395,6 +397,7 @@ class Handler(BaseHTTPRequestHandler):
def __init__(self, request, address, server): def __init__(self, request, address, server):
self.__server = server self.__server = server
self.tree = server.tree self.tree = server.tree
self.url = "http://%s:%s/" % (server.server_name, server.server_port)
BaseHTTPRequestHandler.__init__(self, request, address, server) BaseHTTPRequestHandler.__init__(self, request, address, server)
def do_GET(self): def do_GET(self):
...@@ -410,6 +413,14 @@ class Handler(BaseHTTPRequestHandler): ...@@ -410,6 +413,14 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
self.wfile.write(out) self.wfile.write(out)
def forbidden():
self.send_response(403, 'Forbidden')
out = b'<html><body>Forbidden</body></html>'
self.send_header('Content-Length', str(len(out)))
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(out)
if self.path == '/enable_server_logging': if self.path == '/enable_server_logging':
self.__server.__log = True self.__server.__log = True
return k() return k()
...@@ -419,21 +430,32 @@ class Handler(BaseHTTPRequestHandler): ...@@ -419,21 +430,32 @@ class Handler(BaseHTTPRequestHandler):
return k() return k()
if self.path.startswith('/private/'): if self.path.startswith('/private/'):
parsed = urlparse(self.path)
auth = self.headers.get('Authorization') auth = self.headers.get('Authorization')
if auth and auth.startswith('Basic ') and \ if auth and auth.startswith('Basic ') and \
self.path[9:].encode() == base64.b64decode( parsed.path[9:].encode() == base64.b64decode(
self.headers.get('Authorization')[6:]): self.headers.get('Authorization')[6:]):
if parsed.query:
for h, v in parse_qsl(parsed.query):
if self.headers.get(h) != v:
return forbidden()
return k() return k()
# But not returning 401+WWW-Authenticate, we check that the client # But not returning 401+WWW-Authenticate, we check that the client
# skips auth challenge, which is not free (in terms of performance) # skips auth challenge, which is not free (in terms of performance)
# and useless for what we support. # and useless for what we support.
self.send_response(403, 'Forbidden') return forbidden()
out = '<html><body>Forbidden</body></html>'.encode() if self.path.startswith('/namespace/project/-/raw/master/'):
self.send_header('Content-Length', str(len(out))) # This path is private and need /api
self.send_header('Content-Type', 'text/html') return forbidden()
self.end_headers() if self.path.startswith('/api/v4/projects/namespace%2Fproject/repository/files/'):
self.wfile.write(out) # path is : /api/v4/projects/namespace%2Fproject/repository/files/FILE_PATH_XXXX/raw/?ref=master
return u = unquote(self.path.split('/')[7])
username, password = u.split(':')
token = self.headers.get('PRIVATE-TOKEN')
if password == token and username == "private_token":
return k()
return forbidden()
path = os.path.abspath(os.path.join(self.tree, *self.path.split('/'))) path = os.path.abspath(os.path.join(self.tree, *self.path.split('/')))
if not ( if not (
......
...@@ -183,6 +183,50 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden ...@@ -183,6 +183,50 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> is_temp; remove(path) >>> is_temp; remove(path)
True True
>>> os.environ['HOME'] = old_home >>> os.environ['HOME'] = old_home
>>> remove(netrc)
Gitlab private token file with netrc:
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> import os, zc.buildout.download
>>> old_home = os.environ['HOME']
>>> home = os.environ['HOME'] = tmpdir('test-laburl')
>>> netrc = join(home, '.netrc')
>>> c = "machine localhost login foo password bar\n\nmacdef buildout:localhost\n\t/(.*)/foo/([\w\-]+)/bar \n\t\t{0}/bar/{1}/file/foo?ref={2.quote} \n\t/(.*)/-/raw/([\w\-]+)/(.*) \n\t\t{0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2.quote} PRIVATE-TOKEN=TOKENXXX "
>>> write(netrc, c)
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
Gitlab private raw with netrc and no macdef:
>>> remove(netrc)
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENPROJECT'
>>> write(netrc, 'machine localhost/namespace/project login private_token password TOKENPROJECT')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
netrc with macdef rewrite any url
>>> remove(netrc)
>>> c = 'machine localhost login foo password bar\n\nmacdef buildout:localhost\n\t/(\w+)/redirect/(\w+):(\w+)/(\w+\s\d).* \n\t\t{0}/private/{2}:{3} h1={4.quote} h2={4} token=key token2="value 2" '
>>> write(netrc, c)
>>> url = server_url + 'notfound/redirect/foo:bar/test 2?h1=test%202&h2=test 2&token=key&token2=value 2'
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(url)
>>> is_temp; remove(path)
True
>>> os.environ['HOME'] = old_home
>>> remove(netrc)
Downloading using the download cache Downloading using the download cache
------------------------------------ ------------------------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment