Commit 5f330b47 authored by Stan Hu's avatar Stan Hu

Only generate CI artifact metadata for ZIP files

In the beginning, CI artifacts were resticted to ZIP files, but they
have been expanded to be JSON files, tar.gz files, and more. Prior to
this change, these gitlab-zip-metadata would be run for each of these
non-ZIP artifacts, generating lots of noise in the logs.

We now inspect the `artifact_format` query string to determine whether
to generate metadata. If the format is ZIP, which is the case for CI
archives and LSIF files, we generate metadata.

Relates to https://gitlab.com/gitlab-org/gitlab-workhorse/-/issues/272
parent da82ee96
---
title: Only generate CI artifact metadata for ZIP files
merge_request: 627
author:
type: fixed
...@@ -98,6 +98,8 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) { ...@@ -98,6 +98,8 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) {
storeServer := httptest.NewServer(storeServerMux) storeServer := httptest.NewServer(storeServerMux)
defer storeServer.Close() defer storeServer.Close()
qs := fmt.Sprintf("?%s=%s", ArtifactFormatKey, ArtifactFormatZip)
tests := []struct { tests := []struct {
name string name string
preauth api.Response preauth api.Response
...@@ -106,7 +108,7 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) { ...@@ -106,7 +108,7 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) {
name: "ObjectStore Upload", name: "ObjectStore Upload",
preauth: api.Response{ preauth: api.Response{
RemoteObject: api.RemoteObject{ RemoteObject: api.RemoteObject{
StoreURL: storeServer.URL + "/url/put", StoreURL: storeServer.URL + "/url/put" + qs,
ID: "store-id", ID: "store-id",
GetURL: storeServer.URL + "/store-id", GetURL: storeServer.URL + "/store-id",
}, },
...@@ -123,7 +125,7 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) { ...@@ -123,7 +125,7 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) {
defer ts.Close() defer ts.Close()
contentBuffer, contentType := createTestMultipartForm(t, archiveData) contentBuffer, contentType := createTestMultipartForm(t, archiveData)
response := testUploadArtifacts(t, contentType, ts.URL+Path, &contentBuffer) response := testUploadArtifacts(t, contentType, ts.URL+Path+qs, &contentBuffer)
require.Equal(t, http.StatusOK, response.Code) require.Equal(t, http.StatusOK, response.Code)
testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderPresent) testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderPresent)
require.Equal(t, 1, storeServerCalled, "store should be called only once") require.Equal(t, 1, storeServerCalled, "store should be called only once")
......
...@@ -8,6 +8,7 @@ import ( ...@@ -8,6 +8,7 @@ import (
"net/http" "net/http"
"os" "os"
"os/exec" "os/exec"
"strings"
"syscall" "syscall"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
...@@ -20,6 +21,13 @@ import ( ...@@ -20,6 +21,13 @@ import (
"gitlab.com/gitlab-org/gitlab-workhorse/internal/zipartifacts" "gitlab.com/gitlab-org/gitlab-workhorse/internal/zipartifacts"
) )
// Sent by the runner: https://gitlab.com/gitlab-org/gitlab-runner/blob/c24da19ecce8808d9d2950896f70c94f5ea1cc2e/network/gitlab.go#L580
const (
ArtifactFormatKey = "artifact_format"
ArtifactFormatZip = "zip"
ArtifactFormatDefault = ""
)
var zipSubcommandsErrorsCounter = prometheus.NewCounterVec( var zipSubcommandsErrorsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{ prometheus.CounterOpts{
Name: "gitlab_workhorse_zip_subcommand_errors_total", Name: "gitlab_workhorse_zip_subcommand_errors_total",
...@@ -31,7 +39,8 @@ func init() { ...@@ -31,7 +39,8 @@ func init() {
} }
type artifactsUploadProcessor struct { type artifactsUploadProcessor struct {
opts *filestore.SaveFileOpts opts *filestore.SaveFileOpts
format string
upload.SavedFileTracker upload.SavedFileTracker
} }
...@@ -112,26 +121,30 @@ func (a *artifactsUploadProcessor) ProcessFile(ctx context.Context, formName str ...@@ -112,26 +121,30 @@ func (a *artifactsUploadProcessor) ProcessFile(ctx context.Context, formName str
select { select {
case <-ctx.Done(): case <-ctx.Done():
return fmt.Errorf("ProcessFile: context done") return fmt.Errorf("ProcessFile: context done")
default: default:
// TODO: can we rely on disk for shipping metadata? Not if we split workhorse and rails in 2 different PODs }
metadata, err := a.generateMetadataFromZip(ctx, file)
if err != nil {
return err
}
if metadata != nil { if !strings.EqualFold(a.format, ArtifactFormatZip) && a.format != ArtifactFormatDefault {
fields, err := metadata.GitLabFinalizeFields("metadata") return nil
if err != nil { }
return fmt.Errorf("finalize metadata field error: %v", err)
}
for k, v := range fields { // TODO: can we rely on disk for shipping metadata? Not if we split workhorse and rails in 2 different PODs
writer.WriteField(k, v) metadata, err := a.generateMetadataFromZip(ctx, file)
} if err != nil {
return err
}
if metadata != nil {
fields, err := metadata.GitLabFinalizeFields("metadata")
if err != nil {
return fmt.Errorf("finalize metadata field error: %v", err)
}
a.Track("metadata", metadata.LocalPath) for k, v := range fields {
writer.WriteField(k, v)
} }
a.Track("metadata", metadata.LocalPath)
} }
return nil return nil
...@@ -149,7 +162,9 @@ func UploadArtifacts(myAPI *api.API, h http.Handler, p upload.Preparer) http.Han ...@@ -149,7 +162,9 @@ func UploadArtifacts(myAPI *api.API, h http.Handler, p upload.Preparer) http.Han
return return
} }
mg := &artifactsUploadProcessor{opts: opts, SavedFileTracker: upload.SavedFileTracker{Request: r}} format := r.URL.Query().Get(ArtifactFormatKey)
mg := &artifactsUploadProcessor{opts: opts, format: format, SavedFileTracker: upload.SavedFileTracker{Request: r}}
upload.HandleFileUploads(w, r, h, a, mg, opts) upload.HandleFileUploads(w, r, h, a, mg, opts)
}, "/authorize") }, "/authorize")
} }
...@@ -5,6 +5,7 @@ import ( ...@@ -5,6 +5,7 @@ import (
"bytes" "bytes"
"compress/gzip" "compress/gzip"
"encoding/json" "encoding/json"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"mime/multipart" "mime/multipart"
...@@ -119,7 +120,7 @@ type testServer struct { ...@@ -119,7 +120,7 @@ type testServer struct {
cleanup func() cleanup func()
} }
func setupWithTmpPath(t *testing.T, filename string, authResponse *api.Response, bodyProcessor func(w http.ResponseWriter, r *http.Request)) *testServer { func setupWithTmpPath(t *testing.T, filename string, includeFormat bool, format string, authResponse *api.Response, bodyProcessor func(w http.ResponseWriter, r *http.Request)) *testServer {
tempPath, err := ioutil.TempDir("", "uploads") tempPath, err := ioutil.TempDir("", "uploads")
require.NoError(t, err) require.NoError(t, err)
...@@ -141,7 +142,13 @@ func setupWithTmpPath(t *testing.T, filename string, authResponse *api.Response, ...@@ -141,7 +142,13 @@ func setupWithTmpPath(t *testing.T, filename string, authResponse *api.Response,
require.NoError(t, writer.Close()) require.NoError(t, writer.Close())
} }
return &testServer{url: ts.URL + Path, writer: writer, buffer: &buffer, fileWriter: fileWriter, cleanup: cleanup} qs := ""
if includeFormat {
qs = fmt.Sprintf("?%s=%s", ArtifactFormatKey, format)
}
return &testServer{url: ts.URL + Path + qs, writer: writer, buffer: &buffer, fileWriter: fileWriter, cleanup: cleanup}
} }
func testUploadArtifacts(t *testing.T, contentType, url string, body io.Reader) *httptest.ResponseRecorder { func testUploadArtifacts(t *testing.T, contentType, url string, body io.Reader) *httptest.ResponseRecorder {
...@@ -160,37 +167,91 @@ func testUploadArtifacts(t *testing.T, contentType, url string, body io.Reader) ...@@ -160,37 +167,91 @@ func testUploadArtifacts(t *testing.T, contentType, url string, body io.Reader)
} }
func TestUploadHandlerAddingMetadata(t *testing.T) { func TestUploadHandlerAddingMetadata(t *testing.T) {
s := setupWithTmpPath(t, "file", nil, testCases := []struct {
desc string
format string
includeFormat bool
}{
{
desc: "ZIP format",
format: ArtifactFormatZip,
includeFormat: true,
},
{
desc: "default format",
format: ArtifactFormatDefault,
includeFormat: true,
},
{
desc: "default format without artifact_format",
format: ArtifactFormatDefault,
includeFormat: false,
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
s := setupWithTmpPath(t, "file", tc.includeFormat, tc.format, nil,
func(w http.ResponseWriter, r *http.Request) {
token, err := jwt.ParseWithClaims(r.Header.Get(upload.RewrittenFieldsHeader), &upload.MultipartClaims{}, testhelper.ParseJWT)
require.NoError(t, err)
rewrittenFields := token.Claims.(*upload.MultipartClaims).RewrittenFields
require.Equal(t, 2, len(rewrittenFields))
require.Contains(t, rewrittenFields, "file")
require.Contains(t, rewrittenFields, "metadata")
require.Contains(t, r.PostForm, "file.gitlab-workhorse-upload")
require.Contains(t, r.PostForm, "metadata.gitlab-workhorse-upload")
},
)
defer s.cleanup()
archive := zip.NewWriter(s.fileWriter)
file, err := archive.Create("test.file")
require.NotNil(t, file)
require.NoError(t, err)
require.NoError(t, archive.Close())
require.NoError(t, s.writer.Close())
response := testUploadArtifacts(t, s.writer.FormDataContentType(), s.url, s.buffer)
require.Equal(t, http.StatusOK, response.Code)
testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderPresent)
})
}
}
func TestUploadHandlerTarArtifact(t *testing.T) {
s := setupWithTmpPath(t, "file", true, "tar", nil,
func(w http.ResponseWriter, r *http.Request) { func(w http.ResponseWriter, r *http.Request) {
token, err := jwt.ParseWithClaims(r.Header.Get(upload.RewrittenFieldsHeader), &upload.MultipartClaims{}, testhelper.ParseJWT) token, err := jwt.ParseWithClaims(r.Header.Get(upload.RewrittenFieldsHeader), &upload.MultipartClaims{}, testhelper.ParseJWT)
require.NoError(t, err) require.NoError(t, err)
rewrittenFields := token.Claims.(*upload.MultipartClaims).RewrittenFields rewrittenFields := token.Claims.(*upload.MultipartClaims).RewrittenFields
require.Equal(t, 2, len(rewrittenFields)) require.Equal(t, 1, len(rewrittenFields))
require.Contains(t, rewrittenFields, "file") require.Contains(t, rewrittenFields, "file")
require.Contains(t, rewrittenFields, "metadata")
require.Contains(t, r.PostForm, "file.gitlab-workhorse-upload") require.Contains(t, r.PostForm, "file.gitlab-workhorse-upload")
require.Contains(t, r.PostForm, "metadata.gitlab-workhorse-upload")
}, },
) )
defer s.cleanup() defer s.cleanup()
archive := zip.NewWriter(s.fileWriter) file, err := os.Open("../../testdata/tarfile.tar")
file, err := archive.Create("test.file")
require.NotNil(t, file)
require.NoError(t, err) require.NoError(t, err)
require.NoError(t, archive.Close()) _, err = io.Copy(s.fileWriter, file)
require.NoError(t, err)
require.NoError(t, file.Close())
require.NoError(t, s.writer.Close()) require.NoError(t, s.writer.Close())
response := testUploadArtifacts(t, s.writer.FormDataContentType(), s.url, s.buffer) response := testUploadArtifacts(t, s.writer.FormDataContentType(), s.url, s.buffer)
require.Equal(t, http.StatusOK, response.Code) require.Equal(t, http.StatusOK, response.Code)
testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderPresent) testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderMissing)
} }
func TestUploadHandlerForUnsupportedArchive(t *testing.T) { func TestUploadHandlerForUnsupportedArchive(t *testing.T) {
s := setupWithTmpPath(t, "file", nil, nil) s := setupWithTmpPath(t, "file", true, "other", nil, nil)
defer s.cleanup() defer s.cleanup()
require.NoError(t, s.writer.Close()) require.NoError(t, s.writer.Close())
...@@ -200,7 +261,7 @@ func TestUploadHandlerForUnsupportedArchive(t *testing.T) { ...@@ -200,7 +261,7 @@ func TestUploadHandlerForUnsupportedArchive(t *testing.T) {
} }
func TestUploadHandlerForMultipleFiles(t *testing.T) { func TestUploadHandlerForMultipleFiles(t *testing.T) {
s := setupWithTmpPath(t, "file", nil, nil) s := setupWithTmpPath(t, "file", true, "", nil, nil)
defer s.cleanup() defer s.cleanup()
file, err := s.writer.CreateFormFile("file", "my.file") file, err := s.writer.CreateFormFile("file", "my.file")
...@@ -213,7 +274,7 @@ func TestUploadHandlerForMultipleFiles(t *testing.T) { ...@@ -213,7 +274,7 @@ func TestUploadHandlerForMultipleFiles(t *testing.T) {
} }
func TestUploadFormProcessing(t *testing.T) { func TestUploadFormProcessing(t *testing.T) {
s := setupWithTmpPath(t, "metadata", nil, nil) s := setupWithTmpPath(t, "metadata", true, "", nil, nil)
defer s.cleanup() defer s.cleanup()
require.NoError(t, s.writer.Close()) require.NoError(t, s.writer.Close())
...@@ -225,7 +286,7 @@ func TestLsifFileProcessing(t *testing.T) { ...@@ -225,7 +286,7 @@ func TestLsifFileProcessing(t *testing.T) {
tempPath, err := ioutil.TempDir("", "uploads") tempPath, err := ioutil.TempDir("", "uploads")
require.NoError(t, err) require.NoError(t, err)
s := setupWithTmpPath(t, "file", &api.Response{TempPath: tempPath, ProcessLsif: true}, nil) s := setupWithTmpPath(t, "file", true, "zip", &api.Response{TempPath: tempPath, ProcessLsif: true}, nil)
defer s.cleanup() defer s.cleanup()
file, err := os.Open("../../testdata/lsif/valid.lsif.zip") file, err := os.Open("../../testdata/lsif/valid.lsif.zip")
...@@ -245,7 +306,7 @@ func TestInvalidLsifFileProcessing(t *testing.T) { ...@@ -245,7 +306,7 @@ func TestInvalidLsifFileProcessing(t *testing.T) {
tempPath, err := ioutil.TempDir("", "uploads") tempPath, err := ioutil.TempDir("", "uploads")
require.NoError(t, err) require.NoError(t, err)
s := setupWithTmpPath(t, "file", &api.Response{TempPath: tempPath, ProcessLsif: true}, nil) s := setupWithTmpPath(t, "file", true, "zip", &api.Response{TempPath: tempPath, ProcessLsif: true}, nil)
defer s.cleanup() defer s.cleanup()
file, err := os.Open("../../testdata/lsif/invalid.lsif.zip") file, err := os.Open("../../testdata/lsif/invalid.lsif.zip")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment