Commit 389ea203 authored by Julien Muchembled's avatar Julien Muchembled

fixup! [feat] allow to rewrite url before download using netrc and macdef

(new commit message)

This adds a generic mechanism in the Download API to rewrite the URL
to be downloaded, possibly with extra headers. Substitued groups from
the matching regular expression can be optionally quoted.

.netrc:

macdef buildout:HOSTNAME
  RULE_1_REGEX
    RULE_1_NEW_URL HEADER1=VALUE1 HEADER2=VALUE2 ...
  RULE_2_REGEX
    RULE_2_NEW_URL ...
  ...

macdef buildout:OTHER_HOSTNAME
  ...

A rewriting rule is defined by a pair of lines with optional indentation
(only there for readability).

The first line of a rule is a regex that matches fully against
the path?query part of the URL.

If the second line is empty, the request isn't changed.
Else parts are parsed using shell-like syntax:
- the first one must produce the full URL to download
- the next ones are optional headers to send
- each part is subject to regex substitution using the
  Python Format Specification Mini-Language:
  captured grouped from the first line are numbered starting from 1,
  {0} is the base URL (scheme://location) and
  the optional `quote` attribute returns the urlencoded value

A use case is to work around
  https://gitlab.com/gitlab-org/gitlab/-/issues/19189
for example with the following .netrc:

    macdef buildout:lab.nexedi.com
      /(.+)/-/raw/([^/]+)/(.+)
        {0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=<ACCESS_TOKEN>
parent 5c3bcc82
...@@ -317,7 +317,7 @@ class Download(object): ...@@ -317,7 +317,7 @@ class Download(object):
self.logger.info('Downloading from url: %s', auth[2]) self.logger.info('Downloading from url: %s', auth[2])
for header in auth[3]: for header in auth[3]:
req.add_header(*header.split('=', 1)) req.add_header(*header.split('=', 1))
cred = auth[0] if auth[1] is None else '{0}:{1}'.format(*auth) cred = auth[0] if auth[1] is None else ':'.join(auth)
req.add_header("Authorization", req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(cred)))) "Basic " + bytes2str(b64encode(str2bytes(cred))))
else: else:
......
...@@ -397,7 +397,6 @@ class Handler(BaseHTTPRequestHandler): ...@@ -397,7 +397,6 @@ class Handler(BaseHTTPRequestHandler):
def __init__(self, request, address, server): def __init__(self, request, address, server):
self.__server = server self.__server = server
self.tree = server.tree self.tree = server.tree
self.url = "http://%s:%s/" % (server.server_name, server.server_port)
  • @xavier_thompson I couldn't find the purpose of this added line. But I also haven't tested my changed. I let you decide.

Please register or sign in to reply
BaseHTTPRequestHandler.__init__(self, request, address, server) BaseHTTPRequestHandler.__init__(self, request, address, server)
def do_GET(self): def do_GET(self):
...@@ -449,7 +448,7 @@ class Handler(BaseHTTPRequestHandler): ...@@ -449,7 +448,7 @@ class Handler(BaseHTTPRequestHandler):
# This path is private and need /api # This path is private and need /api
return forbidden() return forbidden()
if self.path.startswith('/api/v4/projects/namespace%2Fproject/repository/files/'): if self.path.startswith('/api/v4/projects/namespace%2Fproject/repository/files/'):
# path is : /api/v4/projects/namespace%2Fproject/repository/files/FILE_PATH_XXXX/raw/?ref=master # path is: .../files/FILE_PATH_XXXX/raw/?ref=master
u = unquote(self.path.split('/')[7]) u = unquote(self.path.split('/')[7])
username, password = u.split(':') username, password = u.split(':')
token = self.headers.get('PRIVATE-TOKEN') token = self.headers.get('PRIVATE-TOKEN')
......
...@@ -195,7 +195,7 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden ...@@ -195,7 +195,7 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> old_home = os.environ['HOME'] >>> old_home = os.environ['HOME']
>>> home = os.environ['HOME'] = tmpdir('test-laburl') >>> home = os.environ['HOME'] = tmpdir('test-laburl')
>>> netrc = join(home, '.netrc') >>> netrc = join(home, '.netrc')
>>> c = "machine localhost login foo password bar\n\nmacdef buildout:localhost\n\t/(.*)/foo/([\w\-]+)/bar \n\t\t{0}/bar/{1}/file/foo?ref={2.quote} \n\t/(.*)/-/raw/([\w\-]+)/(.*) \n\t\t{0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2.quote} PRIVATE-TOKEN=TOKENXXX " >>> c = "machine localhost\nlogin foo\npassword bar\n\nmacdef buildout:localhost\n\t/(.+)/foo/([^/]+)/bar \n\t\t{0}/bar/{1}/file/foo?ref={2} \n\t/(.+)/-/raw/([^/]+)/(.+) \n\t\t{0}/api/v4/projects/{1.quote}/repository/files/{3.quote}/raw?ref={2} PRIVATE-TOKEN=TOKENXXX"
>>> write(netrc, c) >>> write(netrc, c)
>>> os.chmod(netrc, 0o600) >>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__() >>> zc.buildout.download.netrc.__init__()
...@@ -217,7 +217,7 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden ...@@ -217,7 +217,7 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden
netrc with macdef rewrite any url netrc with macdef rewrite any url
>>> remove(netrc) >>> remove(netrc)
>>> c = 'machine localhost login foo password bar\n\nmacdef buildout:localhost\n\t/(\w+)/redirect/(\w+):(\w+)/(\w+\s\d).* \n\t\t{0}/private/{2}:{3} h1={4.quote} h2={4} token=key token2="value 2" ' >>> c = 'machine localhost\nlogin foo\npassword bar\n\nmacdef buildout:localhost\n\t/(\w+)/redirect/(\w+):(\w+)/(\w+\s\d).* \n\t\t{0}/private/{2}:{3} h1={4.quote} h2={4} token=key token2="value 2"'
>>> write(netrc, c) >>> write(netrc, c)
>>> url = server_url + 'notfound/redirect/foo:bar/test 2?h1=test%202&h2=test 2&token=key&token2=value 2' >>> url = server_url + 'notfound/redirect/foo:bar/test 2?h1=test%202&h2=test 2&token=key&token2=value 2'
>>> os.chmod(netrc, 0o600) >>> os.chmod(netrc, 0o600)
......
  • I wanted to say more about:

    • we should mention (at least with a link) what we are replacing (an extension to gitlab);
    • that is too much work to maintain
    • probably also a word about performance, because if I understand correctly our gitlab patch was also about that
    • (and the gitlab issue if confirmed)

    but I realized this is mostly specific to Nexedi and we should rather focus on upstreaming our changes to buildout.

    I also wondered if there could be other services that don't support basic auth. The gitlab limitation is so old and easy to fix that I thought it was on purpose (some security reason?) but I couldn't find any justification on the gitlab issue.

Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment