Commit 81c99ff4 authored by Alain Takoudjou's avatar Alain Takoudjou

[feat] Try to download gitlab private raw files from gitlab API

If fail to download raw file, check if possible to make download from
API, this is possible is username and password is provided from URL
username should be PRIVATE-TOKEN (this is the username for private token)
parent fb45b3e5
...@@ -22,12 +22,13 @@ try: ...@@ -22,12 +22,13 @@ try:
# Python 3 # Python 3
from urllib.error import HTTPError from urllib.error import HTTPError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from urllib.parse import urlparse, urlunparse from urllib.parse import urlparse, urlunparse, quote, urlencode
except ImportError: except ImportError:
# Python 2 # Python 2
from urlparse import urlparse from urlparse import urlparse
from urlparse import urlunparse from urlparse import urlunparse
from urllib2 import HTTPError, Request, urlopen from urllib2 import HTTPError, Request, urlopen, quote
from urllib import urlencode
from zc.buildout.easy_install import realpath from zc.buildout.easy_install import realpath
from base64 import b64encode from base64 import b64encode
...@@ -44,7 +45,6 @@ import zc.buildout ...@@ -44,7 +45,6 @@ import zc.buildout
from . import bytes2str, str2bytes from . import bytes2str, str2bytes
from .rmtree import rmtree from .rmtree import rmtree
class netrc(netrc.netrc): class netrc(netrc.netrc):
def __init__(*args): def __init__(*args):
...@@ -65,6 +65,9 @@ netrc = netrc() ...@@ -65,6 +65,9 @@ netrc = netrc()
class ChecksumError(zc.buildout.UserError): class ChecksumError(zc.buildout.UserError):
pass pass
class GitlabAccessDeniedError(zc.buildout.UserError):
pass
class Download(object): class Download(object):
"""Configurable download utility. """Configurable download utility.
...@@ -239,6 +242,13 @@ class Download(object): ...@@ -239,6 +242,13 @@ class Download(object):
self.logger.info('using alternate URL: %s', alternate_url) self.logger.info('using alternate URL: %s', alternate_url)
download_url = alternate_url download_url = alternate_url
self.urlretrieve(alternate_url, path) self.urlretrieve(alternate_url, path)
except GitlabAccessDeniedError:
laburl, header_dict = self._labraw_authproxy(url)
if len(header_dict.keys()) > 0:
# gitlab url, try from API
self.urlretrieve(laburl, path, headers=header_dict)
else:
raise
if not check_md5sum(path, md5sum): if not check_md5sum(path, md5sum):
raise ChecksumError('MD5 checksum mismatch downloading %r' raise ChecksumError('MD5 checksum mismatch downloading %r'
% download_url) % download_url)
...@@ -284,15 +294,67 @@ class Download(object): ...@@ -284,15 +294,67 @@ class Download(object):
if auth: if auth:
return '{0}:{2}'.format(*auth), url return '{0}:{2}'.format(*auth), url
def urlretrieve(self, url, tmp_path): def _labraw_authproxy(self, url): # -> url'
header_dict = {}
p = urlparse(url)
pathv = p.path.split('/')
# url path should be /namespace/project/[-/]raw/....
if pathv[3:5] != ['-', 'raw'] and pathv[3:4] != ['raw']:
return url, header_dict
repo = '/'.join(pathv[1:3])
# FIXME this does not support refs like y/bstr.
# To support this we will need to do what
# https://lab.nexedi.com/nexedi/gitlab-workhorse/commit/5b8cf10e
# was doing - try to extract all variants for ref from longest to
# shortest and stop on the first variant thay yields good result.
if pathv[3] == '-': # the url is like .../-/raw/...
ref = pathv[5]
filepath = '/'.join(pathv[6:])
else:
ref = pathv[4]
filepath = '/'.join(pathv[5:])
query = {'ref': ref}
auth_list = (
netrc.authenticators('%s/%s' % (p.hostname, repo)), # auth for lab.nexedi.com/namespace/project
netrc.authenticators(p.hostname) # auth for lab.nexedi.com
)
auth = auth_list[1] if auth_list[0] is None else auth_list[0]
if auth is not None:
if auth[0] == "private_token":
header_dict["PRIVATE-TOKEN"] = auth[2]
else:
query[auth[0]] = auth[2] # only private_token is supported ?
elif p.username == "PRIVATE-TOKEN" and p.password:
header_dict["PRIVATE-TOKEN"] = p.password
qrepo = quote(repo, '')
qfilepath = quote(filepath, '')
path = '/api/v4/projects/%s/repository/files/%s/raw' % (qrepo, qfilepath)
netloc = '%s:%s' % (p.hostname, p.port) if p.port else p.hostname
return urlunparse((p.scheme, netloc, path, p.params,
urlencode(query), p.fragment)), header_dict
def urlretrieve(self, url, tmp_path, headers={}):
auth = self._auth(url) auth = self._auth(url)
if auth: if auth:
req = Request(auth[1]) req = Request(auth[1])
req.add_header("Authorization", req.add_header("Authorization",
"Basic " + bytes2str(b64encode(str2bytes(auth[0])))) "Basic " + bytes2str(b64encode(str2bytes(auth[0]))))
else: else:
req = url req = Request(url)
for k, v in headers.items():
req.add_header(k, v)
with closing(urlopen(req)) as src: with closing(urlopen(req)) as src:
# If access to gitlab url was denied,
# we have been redirected to BASE_URL/users/sign_in
if src.url.endswith("users/sign_in"):
raise GitlabAccessDeniedError("Redirected to Sign in page")
with open(tmp_path, 'wb') as dst: with open(tmp_path, 'wb') as dst:
shutil.copyfileobj(src, dst) shutil.copyfileobj(src, dst)
return tmp_path, src.info() return tmp_path, src.info()
......
...@@ -18,10 +18,11 @@ try: ...@@ -18,10 +18,11 @@ try:
# Python 3 # Python 3
from http.server import HTTPServer, BaseHTTPRequestHandler from http.server import HTTPServer, BaseHTTPRequestHandler
from urllib.request import urlopen from urllib.request import urlopen
from urllib.parse import unquote
except ImportError: except ImportError:
# Python 2 # Python 2
from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
from urllib2 import urlopen from urllib2 import urlopen, unquote
import base64 import base64
import errno import errno
...@@ -395,6 +396,7 @@ class Handler(BaseHTTPRequestHandler): ...@@ -395,6 +396,7 @@ class Handler(BaseHTTPRequestHandler):
def __init__(self, request, address, server): def __init__(self, request, address, server):
self.__server = server self.__server = server
self.tree = server.tree self.tree = server.tree
self.url = "http://%s:%s/" % (server.server_name, server.server_port)
BaseHTTPRequestHandler.__init__(self, request, address, server) BaseHTTPRequestHandler.__init__(self, request, address, server)
def do_GET(self): def do_GET(self):
...@@ -434,6 +436,27 @@ class Handler(BaseHTTPRequestHandler): ...@@ -434,6 +436,27 @@ class Handler(BaseHTTPRequestHandler):
self.end_headers() self.end_headers()
self.wfile.write(out) self.wfile.write(out)
return return
if self.path.startswith('/namespace/project/-/raw/master/'):
self.send_response(301)
self.send_header('Location', '%susers/sign_in' % self.url)
self.end_headers()
return
if self.path.startswith('/users/sign_in'):
return k()
if self.path.startswith('/api/v4/projects/namespace%2Fproject/repository/files/'):
# path is : /api/v4/projects/namespace%2Fproject/repository/files/private_token:TOKENXXX/raw/?ref=master
u = unquote(self.path.split('/')[7])
username, password = u.split(':')
token = self.headers.get('PRIVATE-TOKEN')
if username == "private_token" and password == token:
return k()
self.send_response(403, 'Forbidden')
out = '<html><body>Forbidden</body></html>'.encode()
self.send_header('Content-Length', str(len(out)))
self.send_header('Content-Type', 'text/html')
self.end_headers()
self.wfile.write(out)
return
path = os.path.abspath(os.path.join(self.tree, *self.path.split('/'))) path = os.path.abspath(os.path.join(self.tree, *self.path.split('/')))
if not ( if not (
......
...@@ -184,6 +184,53 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden ...@@ -184,6 +184,53 @@ UserError: Error downloading ...: HTTP Error 403: Forbidden
True True
>>> os.environ['HOME'] = old_home >>> os.environ['HOME'] = old_home
Gitlab private raw file with netrc:
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> download(laburl)
Traceback (most recent call last):
GitlabAccessDeniedError: Redirected to Sign in page
>>> import os, zc.buildout.download
>>> old_home = os.environ['HOME']
>>> home = os.environ['HOME'] = tmpdir('test-laburl')
>>> netrc = join(home, '.netrc')
>>> write(netrc, 'machine localhost login private_token password TOKENXXX')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
Gitlab private raw with project token:
>>> remove(netrc)
>>> laburl = server_url + 'namespace/project/-/raw/master/private_token:TOKENPROJECT'
>>> write(netrc, 'machine localhost/namespace/project login private_token password TOKENPROJECT')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
Gitlab private raw file HTTP basic authentication:
>>> remove(netrc)
>>> template_url = server_url.replace('/localhost:', '/%s@localhost:')
>>> base_url = template_url % "PRIVATE-TOKEN:TOKENXXX"
>>> laburl = base_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> write(netrc, 'machine localdomain login foo password bar')
>>> os.chmod(netrc, 0o600)
>>> zc.buildout.download.netrc.__init__()
>>> path, is_temp = download(laburl)
>>> is_temp; remove(path)
True
>>> base_url = template_url % "PRIVATE-TOKEN:BADTOKENXXX"
>>> laburl = base_url + 'namespace/project/-/raw/master/private_token:TOKENXXX'
>>> download(laburl)
Traceback (most recent call last):
UserError: Error downloading ...: HTTP Error 403: Forbidden
>>> os.environ['HOME'] = old_home
Downloading using the download cache Downloading using the download cache
------------------------------------ ------------------------------------
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment