Commit 8fd7b24b authored by Alessio Caiazza's avatar Alessio Caiazza

Merge branch '284-remove-zip-buffer' into 'master'

Remove an in-memory buffer for LSIF transformation

Closes #284

See merge request gitlab-org/gitlab-workhorse!586
parents d983e10e da0ba499
---
title: Remove an in-memory buffer for LSIF transformation
merge_request: 586
author:
type: performance
...@@ -2,7 +2,9 @@ package parser ...@@ -2,7 +2,9 @@ package parser
import ( import (
"archive/zip" "archive/zip"
"bufio"
"encoding/json" "encoding/json"
"io"
"strings" "strings"
) )
...@@ -45,7 +47,19 @@ func NewDocs(config Config) (*Docs, error) { ...@@ -45,7 +47,19 @@ func NewDocs(config Config) (*Docs, error) {
}, nil }, nil
} }
func (d *Docs) Read(line []byte) error { func (d *Docs) Parse(r io.Reader) error {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
if err := d.process(scanner.Bytes()); err != nil {
return err
}
}
return scanner.Err()
}
func (d *Docs) process(line []byte) error {
l := Line{} l := Line{}
if err := json.Unmarshal(line, &l); err != nil { if err := json.Unmarshal(line, &l); err != nil {
return err return err
......
package parser package parser
import ( import (
"bytes"
"fmt" "fmt"
"testing" "testing"
...@@ -8,34 +9,34 @@ import ( ...@@ -8,34 +9,34 @@ import (
) )
func createLine(id, label, uri string) []byte { func createLine(id, label, uri string) []byte {
return []byte(fmt.Sprintf(`{"id":"%s","label":"%s","uri":"%s"}`, id, label, uri)) return []byte(fmt.Sprintf(`{"id":"%s","label":"%s","uri":"%s"}`+"\n", id, label, uri))
} }
func TestRead(t *testing.T) { func TestParse(t *testing.T) {
d, err := NewDocs(Config{}) d, err := NewDocs(Config{})
require.NoError(t, err) require.NoError(t, err)
defer d.Close() defer d.Close()
metadataLine := []byte(`{"id":"1","label":"metaData","projectRoot":"file:///Users/nested"}`) data := []byte(`{"id":"1","label":"metaData","projectRoot":"file:///Users/nested"}` + "\n")
data = append(data, createLine("2", "document", "file:///Users/nested/file.rb")...)
data = append(data, createLine("3", "document", "file:///Users/nested/folder/file.rb")...)
data = append(data, createLine("4", "document", "file:///Users/wrong/file.rb")...)
require.NoError(t, d.Read(metadataLine)) require.NoError(t, d.Parse(bytes.NewReader(data)))
require.NoError(t, d.Read(createLine("2", "document", "file:///Users/nested/file.rb")))
require.NoError(t, d.Read(createLine("3", "document", "file:///Users/nested/folder/file.rb")))
require.NoError(t, d.Read(createLine("4", "document", "file:///Users/wrong/file.rb")))
require.Equal(t, d.Entries[2], "file.rb") require.Equal(t, d.Entries[2], "file.rb")
require.Equal(t, d.Entries[3], "folder/file.rb") require.Equal(t, d.Entries[3], "folder/file.rb")
require.Equal(t, d.Entries[4], "file:///Users/wrong/file.rb") require.Equal(t, d.Entries[4], "file:///Users/wrong/file.rb")
} }
func TestReadContainsLine(t *testing.T) { func TestParseContainsLine(t *testing.T) {
d, err := NewDocs(Config{}) d, err := NewDocs(Config{})
require.NoError(t, err) require.NoError(t, err)
defer d.Close() defer d.Close()
line := []byte(`{"id":"5","label":"contains","outV":"1", "inVs": ["2", "3"]}`) line := []byte(`{"id":"5","label":"contains","outV":"1", "inVs": ["2", "3"]}` + "\n")
require.NoError(t, d.Read(line)) require.NoError(t, d.Parse(bytes.NewReader(line)))
require.Equal(t, []Id{2, 3}, d.DocRanges[1]) require.Equal(t, []Id{2, 3}, d.DocRanges[1])
} }
...@@ -2,9 +2,8 @@ package parser ...@@ -2,9 +2,8 @@ package parser
import ( import (
"archive/zip" "archive/zip"
"bufio"
"bytes"
"errors" "errors"
"fmt"
"io" "io"
"io/ioutil" "io/ioutil"
"os" "os"
...@@ -16,84 +15,91 @@ var ( ...@@ -16,84 +15,91 @@ var (
type Parser struct { type Parser struct {
Docs *Docs Docs *Docs
pr *io.PipeReader
} }
type Config struct { type Config struct {
TempPath string TempPath string
} }
func NewParser(r io.Reader, config Config) (*Parser, error) { func NewParser(r io.Reader, config Config) (io.ReadCloser, error) {
docs, err := NewDocs(config) docs, err := NewDocs(config)
if err != nil { if err != nil {
return nil, err return nil, err
} }
zr, err := openZipReader(r, config.TempPath) // ZIP files need to be seekable. Don't hold it all in RAM, use a tempfile
tempFile, err := ioutil.TempFile(config.TempPath, Lsif)
if err != nil { if err != nil {
return nil, err return nil, err
} }
reader := bufio.NewReader(zr)
for {
line, err := reader.ReadBytes('\n')
if err != nil {
break
}
if err := docs.Read(line); err != nil {
return nil, err
}
}
return &Parser{Docs: docs}, nil
}
func (p *Parser) ZipReader() (io.Reader, error) { defer tempFile.Close()
buf := new(bytes.Buffer)
w := zip.NewWriter(buf)
if err := p.Docs.SerializeEntries(w); err != nil { if err := os.Remove(tempFile.Name()); err != nil {
return nil, err return nil, err
} }
if err := w.Close(); err != nil { size, err := io.Copy(tempFile, r)
if err != nil {
return nil, err return nil, err
} }
return buf, nil zr, err := zip.NewReader(tempFile, size)
}
func (p *Parser) Close() error {
return p.Docs.Close()
}
func openZipReader(reader io.Reader, tempDir string) (io.Reader, error) {
tempFile, err := ioutil.TempFile(tempDir, Lsif)
if err != nil { if err != nil {
return nil, err return nil, err
} }
if err := os.Remove(tempFile.Name()); err != nil { if len(zr.File) == 0 {
return nil, err return nil, errors.New("empty zip file")
} }
size, err := io.Copy(tempFile, reader) file, err := zr.File[0].Open()
if err != nil { if err != nil {
return nil, err return nil, err
} }
if _, err := tempFile.Seek(0, io.SeekStart); err != nil { defer file.Close()
if err := docs.Parse(file); err != nil {
return nil, err return nil, err
} }
zr, err := zip.NewReader(tempFile, size) pr, pw := io.Pipe()
if err != nil { parser := &Parser{
return nil, err Docs: docs,
pr: pr,
} }
if len(zr.File) == 0 { go parser.transform(pw)
return nil, errors.New("empty zip file")
return parser, nil
}
func (p *Parser) Read(b []byte) (int, error) {
return p.pr.Read(b)
}
func (p *Parser) Close() error {
p.pr.Close()
return p.Docs.Close()
}
func (p *Parser) transform(pw *io.PipeWriter) {
zw := zip.NewWriter(pw)
if err := p.Docs.SerializeEntries(zw); err != nil {
zw.Close() // Free underlying resources only
pw.CloseWithError(fmt.Errorf("lsif parser: Docs.SerializeEntries: %v", err))
return
}
if err := zw.Close(); err != nil {
pw.CloseWithError(fmt.Errorf("lsif parser: ZipWriter.Close: %v", err))
return
} }
return zr.File[0].Open() pw.Close()
} }
...@@ -38,24 +38,21 @@ func verifyCorrectnessOf(t *testing.T, tmpDir, fileName string) { ...@@ -38,24 +38,21 @@ func verifyCorrectnessOf(t *testing.T, tmpDir, fileName string) {
} }
func createFiles(t *testing.T, filePath, tmpDir string) { func createFiles(t *testing.T, filePath, tmpDir string) {
t.Helper()
file, err := os.Open(filePath) file, err := os.Open(filePath)
require.NoError(t, err) require.NoError(t, err)
p, err := NewParser(file, Config{}) parser, err := NewParser(file, Config{})
require.NoError(t, err) require.NoError(t, err)
r, err := p.ZipReader()
require.NoError(t, err)
require.NoError(t, p.Close())
zipFileName := tmpDir + ".zip" zipFileName := tmpDir + ".zip"
w, err := os.Create(zipFileName) w, err := os.Create(zipFileName)
require.NoError(t, err) require.NoError(t, err)
defer os.RemoveAll(zipFileName) defer os.RemoveAll(zipFileName)
_, err = io.Copy(w, r) _, err = io.Copy(w, parser)
require.NoError(t, err) require.NoError(t, err)
require.NoError(t, parser.Close())
extractZipFiles(t, tmpDir, zipFileName) extractZipFiles(t, tmpDir, zipFileName)
} }
......
package parser package parser
import ( import (
"io"
"io/ioutil"
"os" "os"
"runtime" "runtime"
"testing" "testing"
...@@ -19,12 +21,12 @@ func BenchmarkGenerate(b *testing.B) { ...@@ -19,12 +21,12 @@ func BenchmarkGenerate(b *testing.B) {
file, err := os.Open(filePath) file, err := os.Open(filePath)
require.NoError(b, err) require.NoError(b, err)
p, err := NewParser(file, Config{}) parser, err := NewParser(file, Config{})
require.NoError(b, err) require.NoError(b, err)
_, err = p.ZipReader() _, err = io.Copy(ioutil.Discard, parser)
require.NoError(b, err) require.NoError(b, err)
require.NoError(b, p.Close()) require.NoError(b, parser.Close())
}) })
} }
......
...@@ -5,6 +5,7 @@ import ( ...@@ -5,6 +5,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"io/ioutil"
"mime/multipart" "mime/multipart"
"net/http" "net/http"
"strings" "strings"
...@@ -124,7 +125,7 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa ...@@ -124,7 +125,7 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa
opts.TempFilePrefix = filename opts.TempFilePrefix = filename
var inputReader io.Reader var inputReader io.ReadCloser
var err error var err error
switch { switch {
case exif.IsExifFile(filename): case exif.IsExifFile(filename):
...@@ -138,9 +139,11 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa ...@@ -138,9 +139,11 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa
return err return err
} }
default: default:
inputReader = p inputReader = ioutil.NopCloser(p)
} }
defer inputReader.Close()
fh, err := filestore.SaveFileFromReader(ctx, inputReader, -1, opts) fh, err := filestore.SaveFileFromReader(ctx, inputReader, -1, opts)
if err != nil { if err != nil {
switch err { switch err {
...@@ -166,36 +169,25 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa ...@@ -166,36 +169,25 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa
return rew.filter.ProcessFile(ctx, name, fh, rew.writer) return rew.filter.ProcessFile(ctx, name, fh, rew.writer)
} }
func handleExifUpload(ctx context.Context, r io.Reader, filename string) (io.Reader, error) { func handleExifUpload(ctx context.Context, r io.Reader, filename string) (io.ReadCloser, error) {
log.WithContextFields(ctx, log.Fields{ log.WithContextFields(ctx, log.Fields{
"filename": filename, "filename": filename,
}).Print("running exiftool to remove any metadata") }).Print("running exiftool to remove any metadata")
return exif.NewCleaner(ctx, r) r, err := exif.NewCleaner(ctx, r)
}
func handleLsifUpload(ctx context.Context, reader io.Reader, tempPath, filename string, preauth *api.Response) (io.Reader, error) {
parserConfig := parser.Config{
TempPath: tempPath,
}
p, err := parser.NewParser(reader, parserConfig)
if err != nil { if err != nil {
return nil, err return nil, err
} }
z, err := p.ZipReader() return ioutil.NopCloser(r), nil
if err != nil { }
return nil, err
}
if err := p.Close(); err != nil { func handleLsifUpload(ctx context.Context, reader io.Reader, tempPath, filename string, preauth *api.Response) (io.ReadCloser, error) {
log.WithContextFields(ctx, log.Fields{ parserConfig := parser.Config{
"filename": filename, TempPath: tempPath,
}).Print("failed to close lsif parser: " + err.Error())
} }
return z, nil return parser.NewParser(reader, parserConfig)
} }
func (rew *rewriter) copyPart(ctx context.Context, name string, p *multipart.Part) error { func (rew *rewriter) copyPart(ctx context.Context, name string, p *multipart.Part) error {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment