Commit 81c99ff4 authored by Alain Takoudjou's avatar Alain Takoudjou

[feat] Try to download gitlab private raw files from gitlab API

If fail to download raw file, check if possible to make download from
API, this is possible is username and password is provided from URL
username should be PRIVATE-TOKEN (this is the username for private token)
parent fb45b3e5
......@@ -22,12 +22,13 @@ try:
# Python 3
from urllib.error import HTTPError
from urllib.request import Request, urlopen
from urllib.parse import urlparse, urlunparse
from urllib.parse import urlparse, urlunparse, quote, urlencode
except ImportError:
# Python 2
from urlparse import urlparse
from urlparse import urlunparse
from urllib2 import HTTPError, Request, urlopen
from urllib2 import HTTPError, Request, urlopen, quote
from urllib import urlencode
from zc.buildout.easy_install import realpath
from base64 import b64encode
......@@ -44,7 +45,6 @@ import zc.buildout
from . import bytes2str, str2bytes
from .rmtree import rmtree
class netrc(netrc.netrc):
def __init__(*args):
......@@ -65,6 +65,9 @@ netrc = netrc()
class ChecksumError(zc.buildout.UserError):
pass
class GitlabAccessDeniedError(zc.buildout.UserError):
pass
class Download(object):
"""Configurable download utility.
......@@ -239,6 +242,13 @@ class Download(object):
self.logger.info('using alternate URL: %s', alternate_url)
download_url = alternate_url
self.urlretrieve(alternate_url, path)
except GitlabAccessDeniedError:
laburl, header_dict = self._labraw_authproxy(url)
if len(header_dict.keys()) > 0:
# gitlab url, try from API
self.urlretrieve(laburl, path, headers=header_dict)
else:
raise
if not check_md5sum(path, md5sum):
raise ChecksumError('MD5 checksum mismatch downloading %r'
% download_url)
......@@ -284,15 +294,67 @@ class Download(object):
if auth:
return '{0}:{2}'.format(*auth), url
def urlretrieve(self, url, tmp_path):
def _labraw_authproxy(self, url): # -> url'
header_dict = {}
p = urlparse(url)
pathv = p.path.split('/')
# url path should be /namespace/project/[-/]raw/....
if pathv[3:5] != ['-', 'raw'] and pathv[3:4] != ['raw']:
return url, header_dict
repo = '/'.join(pathv[1:3])
# FIXME this does not support refs like y/bstr.
# To support this we will need to do what
# https://lab.nexedi.com/nexedi/gitlab-workhorse/commit/5b8cf10e
# was doing - try to extract all variants for ref from longest to
# shortest and stop on the first variant thay yields good result.
if pathv[3] == '-': # the url is like .../-/raw/...
ref = pathv[5]
filepath = '/'.join(pathv[6:])
else:
ref = pathv[4]
filepath = '/'.join(pathv[5:])
query = {'ref': ref}
auth_list = (
netrc.authenticators('%s/%s' % (p.hostname, repo)), # auth for lab.nexedi.com/namespace/project
netrc.authenticators(p.hostname) # auth for lab.nexedi.com
)
auth = auth_list[1] if auth_list[0] is None else auth_list[0]
if auth is not None:
if auth[0] == "private_token":
header_dict["PRIVATE-TOKEN"] = auth[2]
else:
query[auth[0]] = auth[2] # only private_token is supported ?
elif p.username == "PRIVATE-TOKEN" and p.password:
header_dict["PRIVATE-TOKEN"] = p.password
qrepo = quote(repo, '')
qfilepath = quote(filepath, '')
path = '/api/v4/projects/%s/repository/files/%s/raw' % (qrepo, qfilepath)
netloc = '%s:%s' % (p.hostname, p.port) if p.port else p.hostname
return urlunparse((p.scheme, netloc, path, p.params,
urlencode(query), p.fragment)), header_dict
def urlretrieve(self, url, tmp_path, headers={}):
auth = self._auth(url)
if auth:
req = Request(auth[1])
req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth[0]))))
else:
req = url
req = Request(url)
for k, v in headers.items():
req.add_header(k, v)
with closing(urlopen(req)) as src:
# If access to gitlab url was denied,
# we have been redirected to BASE_URL/users/sign_in
if src.url.endswith("users/sign_in"):
raise GitlabAccessDeniedError("Redirected to Sign in page")
with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst)
return tmp_path, src.info()
......
......@@ -18,10 +18,11 @@ try:
# Python 3
from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import urlopen
from urllib.parse import unquote
except ImportError:
# Python 2
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urllib2 import urlopen
from urllib2 import urlopen, unquote
import base64
import errno
......@@ -395,6 +396,7 @@ class Handler(BaseHTTPRequestHandler):
def __init__(self, request, address, server):
self.__server = server
self.tree = server.tree
self.url = "http://%s:%s/" % (server.server_name, server.server_port)
BaseHTTPRequestHandler.__init__(self, request, address, server)
def do_GET(self):
......@@ -434,6 +436,27 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers()
self.wfile.write(out)
return
if self.path.startswith('/namespace/project/-/raw/master/'):
self.send_response(301)
self.send_header('Location', '%susers/sign_in' % self.url)
self.end_headers()
return
if self.path.startswith('/users/sign_in'):
return k()
if self.path.startswith('/api/v4/projects/namespace%2Fproject/repository/files/'):
# path is : /api/v4/projects/namespace%2Fproject/repository/files/private_token:TOKENXXX/raw/?ref=master
u = unquote(self.path.split('/')[7])
username, password = u.split(':')
token = self.headers.get('PRIVATE-TOKEN')
if username == "private_token" and password == token:
return k()
self.send_response(403, 'Forbidden')
out = '<html><body>Forbidden</body></html>'.encode()
self.send_header('Content-Length', str(len(out)))
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(out)
return
path = os.path.abspath(os.path.join(self.tree, *self.path.split('/')))
if not (
......
......@@ -184,6 +184,53 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden
True
>>> os.environ['HOME'] = old_home
Gitlab private raw file with netrc:
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> download(laburl)
Traceback (most recent call last):
GitlabAccessDeniedError: Redirected to Sign in page
>>> import os, zc.buildout.download
>>> old_home = os.environ['HOME']
>>> home = os.environ['HOME'] = tmpdir('test-laburl')
>>> netrc = join(home, '.netrc')
>>> write(netrc, 'machine localhost login private_token password TOKENXXX')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
Gitlab private raw with project token:
>>> remove(netrc)
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENPROJECT'
>>> write(netrc, 'machine localhost/namespace/project login private_token password TOKENPROJECT')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
Gitlab private raw file HTTP basic authentication:
>>> remove(netrc)
>>> template_url = server_url.replace('/localhost:', '/%s@localhost:')
>>> base_url = template_url % "PRIVATE-TOKEN:TOKENXXX"
>>> laburl = base_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> write(netrc, 'machine localdomain login foo password bar')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
>>> base_url = template_url % "PRIVATE-TOKEN:BADTOKENXXX"
>>> laburl = base_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> os.environ['HOME'] = old_home
Downloading using the download cache
------------------------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment