Commit c390f92e authored by Kirill Smelkov's avatar Kirill Smelkov

X longest-match ref works + cache cleanup

parent dabcc517
Pipeline #114 failed with stage
......@@ -2,93 +2,56 @@
Handler for raw blob downloads
*/
/*
Cache-Control: private
ETag: "4c10677531b44f555ebbdaff24a9b2d6"
X-Content-Type-Options: nosniff
Content-Disposition: inline
Content-Transfer-Encoding: binary
Content-Type: text/plain; charset=utf-8
*/
package main
import (
"io"
// "os"
"log"
"fmt"
"bufio"
"time"
"strings"
"bytes"
"regexp"
"net/http"
"net/http/httptest"
)
// auth backend reply
type backendAuthReply struct {
// Reply from auth backend for "download from repo" authorization request
type AuthReply struct {
w *httptest.ResponseRecorder // output of backend/preAuthorizeHandler
authorizationResponse
authorizationResponse // parsed auth response from preAuthorizeHandler
}
// ask auth backend whether download is ok for project
func askAuthBackend(u *upstream, project string) backendAuthReply {
authReply := backendAuthReply{
w: httptest.NewRecorder(),
}
// request to verify whether download is possible via asking as git fetch would do
// XXX privateToken not propagated, etc ...
reqDownloadAccess, err := http.NewRequest("GET", project + ".git/info/refs?service=git-upload-pack", nil)
if err != nil {
fail500(authReply.w, "GET git-upload-pack", err)
//return false // XXX not cache as it is just we cannot create request
return authReply
}
// swap original request to 'verify-download' one XXX "swap" not correct
r := &gitRequest{
Request: reqDownloadAccess,
u: u,
}
// Entry in authorization reply cache
type AuthCacheEntry struct {
AuthReply
// downloadOk := false
preAuthorizeHandler(
func(w http.ResponseWriter, r *gitRequest) {
// if we ever get to this point - auth handler approved
// access and thus it is ok to download
// downloadOk = true
}, "") (authReply.w, r)
Tauth int64 // in seconds XXX do we strictly need this?
return authReply
// how many times this entry was hit when quiering auth cache during
// the last refresh period.
Nhit int64
}
// Authorization reply cache
// {} project -> AuthCacheEntry
//
// XXX should be not only project (privateToken etc...)
var authCache = make(map[string]*AuthCacheEntry)
// authorization info, as replied by authBackend for a request
type authInfo struct {
authReply backendAuthReply
// XXX no need?
Tauth int64 // in seconds
// Time period for refreshing / removing unused entires in authCache
const authCacheRefresh = 5 * time.Second // XXX -> 30 or 60
Naccess int64
}
// {} project -> authInfo
// FIXME should be not only project (privateToken etc...)
var authCache = make(map[string]*authInfo)
// Goroutine to refresh auth cache entry periodically while it is used.
// if the entry is detected to be not used - remove it from cache and stop rereshing.
func authRefreshEntry(u *upstream, project string) {
// XXX auth := authCache[project]
// and then use auth without authCache lookup ?
const authCacheRefresh = 10 * time.Second // XXX -> 30
// refresh auth cache entry periodically while it is used
// if the entry is detected to be not used - remove it from cache and stop rereshing
func authRefresh(u *upstream, project string) {
for ;; {
log.Printf("AUTH refresh sleep ...")
//log.Printf("AUTH refresh sleep ...")
time.Sleep(authCacheRefresh)
// XXX lock?
......@@ -98,8 +61,9 @@ func authRefresh(u *upstream, project string) {
break // need to further refresh XXX ok?
}
log.Printf("AUTH refresh - %v Naccess: %v", project, auth.Naccess)
if auth.Naccess == 0 { // not used - we can remove and stop refreshing
log.Printf("AUTH refresh - %v #hit: %v", project, auth.Nhit)
if auth.Nhit == 0 { // not used - we can remove and stop refreshing
log.Printf("AUTH - removing %v", project)
// XXX lock?
delete(authCache, project)
break
......@@ -109,44 +73,81 @@ func authRefresh(u *upstream, project string) {
authReply := askAuthBackend(u, project)
// XXX lock ?
auth.authReply = authReply
auth.AuthReply = authReply
auth.Tauth = time.Now().Unix()
auth.Naccess = 0
auth.Nhit = 0
}
}
// verify that download access is authorized by auth backend
func verifyDownloadAccess(w http.ResponseWriter, r *gitRequest, project string) bool {
// Ask auth backend about whether download is ok for a project
func askAuthBackend(u *upstream, project string) AuthReply {
authReply := AuthReply{
w: httptest.NewRecorder(),
}
// Request to auth backend to verify whether download is possible via
// asking as git fetch would do.
// XXX privateToken not propagated, etc ...
reqDownloadAccess, err := http.NewRequest("GET", project + ".git/info/refs?service=git-upload-pack", nil)
if err != nil {
fail500(authReply.w, "GET git-upload-pack", err)
return authReply
}
// prepare everything and go through preAuthorizeHandler that will send
// request to auth backend and analyze/parse the reply into r.authorizationResponse
r := &gitRequest{
Request: reqDownloadAccess,
u: u,
}
preAuthorizeHandler(
func(w http.ResponseWriter, r *gitRequest) {
// if we ever get to this point - auth handler approved
// access and thus it is ok to download
// downloadOk = true
// NOTE we can use authorizationResponse.RepoPath != "" as test for this
}, "") (authReply.w, r)
// propagate authorizationResponse back and we are done
authReply.authorizationResponse = r.authorizationResponse
return authReply
}
// Verify that download access is authorized by auth backend
func verifyDownloadAccess(w http.ResponseWriter, u *upstream, project string) AuthReply {
// XXX do we need mutex to lock authCache ?
auth, ok := authCache[project]
if ok {
auth.Naccess++
log.Printf("authReply cached %v ago: %v (hits: %v)",
auth.Nhit++
log.Printf("authReply for %v cached ago: %v (hits: %v)",
project,
time.Since(time.Unix(auth.Tauth, 0)),
auth.authReply.authorizationResponse,
auth.Naccess)
r.authorizationResponse = auth.authReply.authorizationResponse
return (auth.authReply.RepoPath != "") // XXX ok?
auth.Nhit)
return auth.AuthReply // XXX make pointer?
}
authReply := askAuthBackend(r.u, project)
authReply := askAuthBackend(u, project)
// XXX do we need to lock authCache ?
authCache[project] = &authInfo{authReply, time.Now().Unix(), 0}
go authRefresh(r.u, project)
return (authReply.RepoPath != "")
// store in cache and start cache entry refresher
authCache[project] = &AuthCacheEntry{authReply, time.Now().Unix(), 0}
go authRefreshEntry(u, project)
return authReply
}
// HTTP handler for .../raw/<ref>/path
var projectRe = regexp.MustCompile(`^/[\w\.-]+/[\w\.-]+/`)
func handleGetBlobRaw(w http.ResponseWriter, r *gitRequest) {
Tstart := time.Now()
// extract project & refpath
// /namespace/project/raw/branch/file -> /namespace/project, branch/file
// Extract project & refpath
// <project>/raw/branch/file -> <project>, branch/file
project := projectRe.FindString(r.Request.URL.Path)
refpath := r.Request.URL.Path[len(project):]
if project == "" {
......@@ -156,68 +157,147 @@ func handleGetBlobRaw(w http.ResponseWriter, r *gitRequest) {
// assert project[-1] == "/"
project = project[:len(project)-1]
// assert refpath[:4] == "raw/"
if refpath[:4] != "raw/" {
fail500(w, "refpath != raw/...", nil)
return
}
refpath = refpath[4:]
if !verifyDownloadAccess(w, r, project) {
// XXX verifyDownloadAccess already emitted 403 headers etc ...
// Query download access auth for this project
authReply := verifyDownloadAccess(w, r.u, project)
if authReply.RepoPath == "" {
// access denied - copy auth reply to client in full -
// there are HTTP code and other headers / body relevant for
// about why access was denied.
for k, v := range authReply.w.HeaderMap {
w.Header()[k] = v
}
w.WriteHeader(authReply.w.Code)
io.Copy(w, authReply.w.Body)
return
}
handleGetBlobRaw2(w, r, refpath)
// Access granted - we can emit the blob
emitBlob(w, authReply.RepoPath, refpath)
}
Tend := time.Now()
/*
Cache-Control: private
ETag: "4c10677531b44f555ebbdaff24a9b2d6"
log.Printf("Tall: %s", Tend.Sub(Tstart))
}
X-Content-Type-Options: nosniff
Content-Disposition: inline
Content-Transfer-Encoding: binary
Content-Type: text/plain; charset=utf-8
*/
func handleGetBlobRaw2(w http.ResponseWriter, r *gitRequest, refpath string) {
Tstart := time.Now()
// XXX we assume <ref>/<path> format and ref not containing "/"
// XXX but gitlab allows ref with / and tries to do longest-match to existing refs
// TODO use reqDownloadAccess respose body - it contain all refs
s := strings.SplitN(refpath, "/", 2)
if len(s) != 2 {
fail500(w, "refpath split", nil)
func emitBlob(w http.ResponseWriter, repopath string, refpath string) {
// Communicate with `git cat-file --batch` trying refs from longest
// to shortest prefix in refpath. This way we find longest-match for
// ref and get object content in the end.
queryCmd := gitCommand("", "git", "--git-dir="+repopath, "cat-file", "--batch")
queryStdin, err := queryCmd.StdinPipe()
if err != nil {
fail500(w, "git cat-file --batch; stdin", err)
return
}
defer queryStdin.Close()
queryStdout, err := queryCmd.StdoutPipe()
if err != nil {
fail500(w, "git cat-file --batch; stdout", err)
return
}
defer queryStdout.Close()
queryReader := bufio.NewReader(queryStdout)
ref, path := s[0], s[1]
//log.Printf("BLOB2 %v %v", ref, path)
err = queryCmd.Start()
if err != nil {
fail500(w, "git cat-file --batch; start", err)
return
}
defer cleanUpProcessGroup(queryCmd) // XXX do we really need this?
// refpath components as vector
refpathv := strings.Split(refpath, "/")
// scan from right to left and try to change '/' -> ':' and see if it
// creates a correct object name. If it does - we read object content
// which follows.
// TODO handle communication timeout
var sha1 string
var type_ string
var size int64
for i := len(refpathv); i > 0; i-- {
ref := strings.Join(refpathv[:i], "/")
path := strings.Join(refpathv[i:], "/")
log.Printf("Trying %v %v", ref, path)
_, err := fmt.Fprintf(queryStdin, "%s:%s\n", ref, path)
if err != nil {
fail500(w, "git cat-file --batch; write", err)
return
}
blobCmd := gitCommand(""/*XXX GL_ID*/, "git", "--git-dir="+r.RepoPath, "cat-file", "blob", "--", ref + ":" + path)
blobStdout, err := blobCmd.StdoutPipe()
reply, err := queryReader.ReadBytes('\n')
if err != nil {
fail500(w, "handleGetBlobRaw", err)
fail500(w, "git cat-file --batch; read", err)
return
}
defer blobStdout.Close()
if err:= blobCmd.Start(); err != nil {
fail500(w, "handleGetBlobRaw", err)
log.Printf("<- %s", reply)
// <object> SP missing LF
if bytes.HasSuffix(reply, []byte(" missing\n")) { // XXX byte literal?
continue
}
// <sha1> SP <type> SP <size> LF
_, err = fmt.Sscanf(string(reply), "%s %s %d\n", &sha1, &type_, &size)
if err != nil {
fail500(w, "git cat-file --batch; reply parse", err)
return;
}
if type_ != "blob" {
// XXX -> 404
fail500(w, fmt.Sprintf("git cat-file --batch-check; %v is not blob (is %v)", sha1, type_), nil)
return
}
defer cleanUpProcessGroup(blobCmd) // XXX do we need to cleanup whole group
// so we found this blob object
break
}
// was the blob found?
if sha1 == "" {
// XXX -> 404
fail400(w, "Blob not found", nil)
return
}
log.Printf("blob found, size: %d", size)
//setRawHeaders(...)
w.WriteHeader(200) // XXX too early
//_, err = io.Copy(os.Stdout, blobStdout)
//if err != nil {
// panic(err)
//}
if _, err := io.Copy(w, blobStdout); err != nil {
logContext("io.Copy", err)
log.Printf("111")
// XXX better use queryStdout instead of queryReader, but we could be
// holding some tail bytes in queryReader after chat phase
_, err = io.CopyN(w, queryReader, size)
if err != nil {
logContext("io.CopyN", err)
return
}
if err := blobCmd.Wait(); err != nil {
logContext("wait", err)
log.Printf("222")
err = queryStdin.Close()
if err != nil {
fail500(w, "queryStdin.Close", nil)
return
}
Tend := time.Now()
log.Printf("Tblob2: %s", Tend.Sub(Tstart))
log.Printf("333")
err = queryCmd.Wait()
if err != nil {
logContext("wait", err)
return
}
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment