Commit 5f330b47 authored by Stan Hu's avatar Stan Hu

Only generate CI artifact metadata for ZIP files

In the beginning, CI artifacts were resticted to ZIP files, but they
have been expanded to be JSON files, tar.gz files, and more. Prior to
this change, these gitlab-zip-metadata would be run for each of these
non-ZIP artifacts, generating lots of noise in the logs.

We now inspect the `artifact_format` query string to determine whether
to generate metadata. If the format is ZIP, which is the case for CI
archives and LSIF files, we generate metadata.

Relates to https://gitlab.com/gitlab-org/gitlab-workhorse/-/issues/272
parent da82ee96
---
title: Only generate CI artifact metadata for ZIP files
merge_request: 627
author:
type: fixed
......@@ -98,6 +98,8 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) {
storeServer := httptest.NewServer(storeServerMux)
defer storeServer.Close()
qs := fmt.Sprintf("?%s=%s", ArtifactFormatKey, ArtifactFormatZip)
tests := []struct {
name string
preauth api.Response
......@@ -106,7 +108,7 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) {
name: "ObjectStore Upload",
preauth: api.Response{
RemoteObject: api.RemoteObject{
StoreURL: storeServer.URL + "/url/put",
StoreURL: storeServer.URL + "/url/put" + qs,
ID: "store-id",
GetURL: storeServer.URL + "/store-id",
},
......@@ -123,7 +125,7 @@ func TestUploadHandlerSendingToExternalStorage(t *testing.T) {
defer ts.Close()
contentBuffer, contentType := createTestMultipartForm(t, archiveData)
response := testUploadArtifacts(t, contentType, ts.URL+Path, &contentBuffer)
response := testUploadArtifacts(t, contentType, ts.URL+Path+qs, &contentBuffer)
require.Equal(t, http.StatusOK, response.Code)
testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderPresent)
require.Equal(t, 1, storeServerCalled, "store should be called only once")
......
......@@ -8,6 +8,7 @@ import (
"net/http"
"os"
"os/exec"
"strings"
"syscall"
"github.com/prometheus/client_golang/prometheus"
......@@ -20,6 +21,13 @@ import (
"gitlab.com/gitlab-org/gitlab-workhorse/internal/zipartifacts"
)
// Sent by the runner: https://gitlab.com/gitlab-org/gitlab-runner/blob/c24da19ecce8808d9d2950896f70c94f5ea1cc2e/network/gitlab.go#L580
const (
ArtifactFormatKey = "artifact_format"
ArtifactFormatZip = "zip"
ArtifactFormatDefault = ""
)
var zipSubcommandsErrorsCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "gitlab_workhorse_zip_subcommand_errors_total",
......@@ -31,7 +39,8 @@ func init() {
}
type artifactsUploadProcessor struct {
opts *filestore.SaveFileOpts
opts *filestore.SaveFileOpts
format string
upload.SavedFileTracker
}
......@@ -112,26 +121,30 @@ func (a *artifactsUploadProcessor) ProcessFile(ctx context.Context, formName str
select {
case <-ctx.Done():
return fmt.Errorf("ProcessFile: context done")
default:
// TODO: can we rely on disk for shipping metadata? Not if we split workhorse and rails in 2 different PODs
metadata, err := a.generateMetadataFromZip(ctx, file)
if err != nil {
return err
}
}
if metadata != nil {
fields, err := metadata.GitLabFinalizeFields("metadata")
if err != nil {
return fmt.Errorf("finalize metadata field error: %v", err)
}
if !strings.EqualFold(a.format, ArtifactFormatZip) && a.format != ArtifactFormatDefault {
return nil
}
for k, v := range fields {
writer.WriteField(k, v)
}
// TODO: can we rely on disk for shipping metadata? Not if we split workhorse and rails in 2 different PODs
metadata, err := a.generateMetadataFromZip(ctx, file)
if err != nil {
return err
}
if metadata != nil {
fields, err := metadata.GitLabFinalizeFields("metadata")
if err != nil {
return fmt.Errorf("finalize metadata field error: %v", err)
}
a.Track("metadata", metadata.LocalPath)
for k, v := range fields {
writer.WriteField(k, v)
}
a.Track("metadata", metadata.LocalPath)
}
return nil
......@@ -149,7 +162,9 @@ func UploadArtifacts(myAPI *api.API, h http.Handler, p upload.Preparer) http.Han
return
}
mg := &artifactsUploadProcessor{opts: opts, SavedFileTracker: upload.SavedFileTracker{Request: r}}
format := r.URL.Query().Get(ArtifactFormatKey)
mg := &artifactsUploadProcessor{opts: opts, format: format, SavedFileTracker: upload.SavedFileTracker{Request: r}}
upload.HandleFileUploads(w, r, h, a, mg, opts)
}, "/authorize")
}
......@@ -5,6 +5,7 @@ import (
"bytes"
"compress/gzip"
"encoding/json"
"fmt"
"io"
"io/ioutil"
"mime/multipart"
......@@ -119,7 +120,7 @@ type testServer struct {
cleanup func()
}
func setupWithTmpPath(t *testing.T, filename string, authResponse *api.Response, bodyProcessor func(w http.ResponseWriter, r *http.Request)) *testServer {
func setupWithTmpPath(t *testing.T, filename string, includeFormat bool, format string, authResponse *api.Response, bodyProcessor func(w http.ResponseWriter, r *http.Request)) *testServer {
tempPath, err := ioutil.TempDir("", "uploads")
require.NoError(t, err)
......@@ -141,7 +142,13 @@ func setupWithTmpPath(t *testing.T, filename string, authResponse *api.Response,
require.NoError(t, writer.Close())
}
return &testServer{url: ts.URL + Path, writer: writer, buffer: &buffer, fileWriter: fileWriter, cleanup: cleanup}
qs := ""
if includeFormat {
qs = fmt.Sprintf("?%s=%s", ArtifactFormatKey, format)
}
return &testServer{url: ts.URL + Path + qs, writer: writer, buffer: &buffer, fileWriter: fileWriter, cleanup: cleanup}
}
func testUploadArtifacts(t *testing.T, contentType, url string, body io.Reader) *httptest.ResponseRecorder {
......@@ -160,37 +167,91 @@ func testUploadArtifacts(t *testing.T, contentType, url string, body io.Reader)
}
func TestUploadHandlerAddingMetadata(t *testing.T) {
s := setupWithTmpPath(t, "file", nil,
testCases := []struct {
desc string
format string
includeFormat bool
}{
{
desc: "ZIP format",
format: ArtifactFormatZip,
includeFormat: true,
},
{
desc: "default format",
format: ArtifactFormatDefault,
includeFormat: true,
},
{
desc: "default format without artifact_format",
format: ArtifactFormatDefault,
includeFormat: false,
},
}
for _, tc := range testCases {
t.Run(tc.desc, func(t *testing.T) {
s := setupWithTmpPath(t, "file", tc.includeFormat, tc.format, nil,
func(w http.ResponseWriter, r *http.Request) {
token, err := jwt.ParseWithClaims(r.Header.Get(upload.RewrittenFieldsHeader), &upload.MultipartClaims{}, testhelper.ParseJWT)
require.NoError(t, err)
rewrittenFields := token.Claims.(*upload.MultipartClaims).RewrittenFields
require.Equal(t, 2, len(rewrittenFields))
require.Contains(t, rewrittenFields, "file")
require.Contains(t, rewrittenFields, "metadata")
require.Contains(t, r.PostForm, "file.gitlab-workhorse-upload")
require.Contains(t, r.PostForm, "metadata.gitlab-workhorse-upload")
},
)
defer s.cleanup()
archive := zip.NewWriter(s.fileWriter)
file, err := archive.Create("test.file")
require.NotNil(t, file)
require.NoError(t, err)
require.NoError(t, archive.Close())
require.NoError(t, s.writer.Close())
response := testUploadArtifacts(t, s.writer.FormDataContentType(), s.url, s.buffer)
require.Equal(t, http.StatusOK, response.Code)
testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderPresent)
})
}
}
func TestUploadHandlerTarArtifact(t *testing.T) {
s := setupWithTmpPath(t, "file", true, "tar", nil,
func(w http.ResponseWriter, r *http.Request) {
token, err := jwt.ParseWithClaims(r.Header.Get(upload.RewrittenFieldsHeader), &upload.MultipartClaims{}, testhelper.ParseJWT)
require.NoError(t, err)
rewrittenFields := token.Claims.(*upload.MultipartClaims).RewrittenFields
require.Equal(t, 2, len(rewrittenFields))
require.Equal(t, 1, len(rewrittenFields))
require.Contains(t, rewrittenFields, "file")
require.Contains(t, rewrittenFields, "metadata")
require.Contains(t, r.PostForm, "file.gitlab-workhorse-upload")
require.Contains(t, r.PostForm, "metadata.gitlab-workhorse-upload")
},
)
defer s.cleanup()
archive := zip.NewWriter(s.fileWriter)
file, err := archive.Create("test.file")
require.NotNil(t, file)
file, err := os.Open("../../testdata/tarfile.tar")
require.NoError(t, err)
require.NoError(t, archive.Close())
_, err = io.Copy(s.fileWriter, file)
require.NoError(t, err)
require.NoError(t, file.Close())
require.NoError(t, s.writer.Close())
response := testUploadArtifacts(t, s.writer.FormDataContentType(), s.url, s.buffer)
require.Equal(t, http.StatusOK, response.Code)
testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderPresent)
testhelper.RequireResponseHeader(t, response, MetadataHeaderKey, MetadataHeaderMissing)
}
func TestUploadHandlerForUnsupportedArchive(t *testing.T) {
s := setupWithTmpPath(t, "file", nil, nil)
s := setupWithTmpPath(t, "file", true, "other", nil, nil)
defer s.cleanup()
require.NoError(t, s.writer.Close())
......@@ -200,7 +261,7 @@ func TestUploadHandlerForUnsupportedArchive(t *testing.T) {
}
func TestUploadHandlerForMultipleFiles(t *testing.T) {
s := setupWithTmpPath(t, "file", nil, nil)
s := setupWithTmpPath(t, "file", true, "", nil, nil)
defer s.cleanup()
file, err := s.writer.CreateFormFile("file", "my.file")
......@@ -213,7 +274,7 @@ func TestUploadHandlerForMultipleFiles(t *testing.T) {
}
func TestUploadFormProcessing(t *testing.T) {
s := setupWithTmpPath(t, "metadata", nil, nil)
s := setupWithTmpPath(t, "metadata", true, "", nil, nil)
defer s.cleanup()
require.NoError(t, s.writer.Close())
......@@ -225,7 +286,7 @@ func TestLsifFileProcessing(t *testing.T) {
tempPath, err := ioutil.TempDir("", "uploads")
require.NoError(t, err)
s := setupWithTmpPath(t, "file", &api.Response{TempPath: tempPath, ProcessLsif: true}, nil)
s := setupWithTmpPath(t, "file", true, "zip", &api.Response{TempPath: tempPath, ProcessLsif: true}, nil)
defer s.cleanup()
file, err := os.Open("../../testdata/lsif/valid.lsif.zip")
......@@ -245,7 +306,7 @@ func TestInvalidLsifFileProcessing(t *testing.T) {
tempPath, err := ioutil.TempDir("", "uploads")
require.NoError(t, err)
s := setupWithTmpPath(t, "file", &api.Response{TempPath: tempPath, ProcessLsif: true}, nil)
s := setupWithTmpPath(t, "file", true, "zip", &api.Response{TempPath: tempPath, ProcessLsif: true}, nil)
defer s.cleanup()
file, err := os.Open("../../testdata/lsif/invalid.lsif.zip")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment