Commit 8fd7b24b authored by Alessio Caiazza's avatar Alessio Caiazza

Merge branch '284-remove-zip-buffer' into 'master'

Remove an in-memory buffer for LSIF transformation

Closes #284

See merge request gitlab-org/gitlab-workhorse!586
parents d983e10e da0ba499
---
title: Remove an in-memory buffer for LSIF transformation
merge_request: 586
author:
type: performance
......@@ -2,7 +2,9 @@ package parser
import (
"archive/zip"
"bufio"
"encoding/json"
"io"
"strings"
)
......@@ -45,7 +47,19 @@ func NewDocs(config Config) (*Docs, error) {
}, nil
}
func (d *Docs) Read(line []byte) error {
func (d *Docs) Parse(r io.Reader) error {
scanner := bufio.NewScanner(r)
for scanner.Scan() {
if err := d.process(scanner.Bytes()); err != nil {
return err
}
}
return scanner.Err()
}
func (d *Docs) process(line []byte) error {
l := Line{}
if err := json.Unmarshal(line, &l); err != nil {
return err
......
package parser
import (
"bytes"
"fmt"
"testing"
......@@ -8,34 +9,34 @@ import (
)
func createLine(id, label, uri string) []byte {
return []byte(fmt.Sprintf(`{"id":"%s","label":"%s","uri":"%s"}`, id, label, uri))
return []byte(fmt.Sprintf(`{"id":"%s","label":"%s","uri":"%s"}`+"\n", id, label, uri))
}
func TestRead(t *testing.T) {
func TestParse(t *testing.T) {
d, err := NewDocs(Config{})
require.NoError(t, err)
defer d.Close()
metadataLine := []byte(`{"id":"1","label":"metaData","projectRoot":"file:///Users/nested"}`)
data := []byte(`{"id":"1","label":"metaData","projectRoot":"file:///Users/nested"}` + "\n")
data = append(data, createLine("2", "document", "file:///Users/nested/file.rb")...)
data = append(data, createLine("3", "document", "file:///Users/nested/folder/file.rb")...)
data = append(data, createLine("4", "document", "file:///Users/wrong/file.rb")...)
require.NoError(t, d.Read(metadataLine))
require.NoError(t, d.Read(createLine("2", "document", "file:///Users/nested/file.rb")))
require.NoError(t, d.Read(createLine("3", "document", "file:///Users/nested/folder/file.rb")))
require.NoError(t, d.Read(createLine("4", "document", "file:///Users/wrong/file.rb")))
require.NoError(t, d.Parse(bytes.NewReader(data)))
require.Equal(t, d.Entries[2], "file.rb")
require.Equal(t, d.Entries[3], "folder/file.rb")
require.Equal(t, d.Entries[4], "file:///Users/wrong/file.rb")
}
func TestReadContainsLine(t *testing.T) {
func TestParseContainsLine(t *testing.T) {
d, err := NewDocs(Config{})
require.NoError(t, err)
defer d.Close()
line := []byte(`{"id":"5","label":"contains","outV":"1", "inVs": ["2", "3"]}`)
line := []byte(`{"id":"5","label":"contains","outV":"1", "inVs": ["2", "3"]}` + "\n")
require.NoError(t, d.Read(line))
require.NoError(t, d.Parse(bytes.NewReader(line)))
require.Equal(t, []Id{2, 3}, d.DocRanges[1])
}
......@@ -2,9 +2,8 @@ package parser
import (
"archive/zip"
"bufio"
"bytes"
"errors"
"fmt"
"io"
"io/ioutil"
"os"
......@@ -16,84 +15,91 @@ var (
type Parser struct {
Docs *Docs
pr *io.PipeReader
}
type Config struct {
TempPath string
}
func NewParser(r io.Reader, config Config) (*Parser, error) {
func NewParser(r io.Reader, config Config) (io.ReadCloser, error) {
docs, err := NewDocs(config)
if err != nil {
return nil, err
}
zr, err := openZipReader(r, config.TempPath)
// ZIP files need to be seekable. Don't hold it all in RAM, use a tempfile
tempFile, err := ioutil.TempFile(config.TempPath, Lsif)
if err != nil {
return nil, err
}
reader := bufio.NewReader(zr)
for {
line, err := reader.ReadBytes('\n')
if err != nil {
break
}
defer tempFile.Close()
if err := docs.Read(line); err != nil {
if err := os.Remove(tempFile.Name()); err != nil {
return nil, err
}
}
return &Parser{Docs: docs}, nil
}
func (p *Parser) ZipReader() (io.Reader, error) {
buf := new(bytes.Buffer)
w := zip.NewWriter(buf)
if err := p.Docs.SerializeEntries(w); err != nil {
size, err := io.Copy(tempFile, r)
if err != nil {
return nil, err
}
if err := w.Close(); err != nil {
zr, err := zip.NewReader(tempFile, size)
if err != nil {
return nil, err
}
return buf, nil
}
func (p *Parser) Close() error {
return p.Docs.Close()
}
if len(zr.File) == 0 {
return nil, errors.New("empty zip file")
}
func openZipReader(reader io.Reader, tempDir string) (io.Reader, error) {
tempFile, err := ioutil.TempFile(tempDir, Lsif)
file, err := zr.File[0].Open()
if err != nil {
return nil, err
}
if err := os.Remove(tempFile.Name()); err != nil {
return nil, err
}
defer file.Close()
size, err := io.Copy(tempFile, reader)
if err != nil {
if err := docs.Parse(file); err != nil {
return nil, err
}
if _, err := tempFile.Seek(0, io.SeekStart); err != nil {
return nil, err
pr, pw := io.Pipe()
parser := &Parser{
Docs: docs,
pr: pr,
}
zr, err := zip.NewReader(tempFile, size)
if err != nil {
return nil, err
go parser.transform(pw)
return parser, nil
}
func (p *Parser) Read(b []byte) (int, error) {
return p.pr.Read(b)
}
func (p *Parser) Close() error {
p.pr.Close()
return p.Docs.Close()
}
func (p *Parser) transform(pw *io.PipeWriter) {
zw := zip.NewWriter(pw)
if err := p.Docs.SerializeEntries(zw); err != nil {
zw.Close() // Free underlying resources only
pw.CloseWithError(fmt.Errorf("lsif parser: Docs.SerializeEntries: %v", err))
return
}
if len(zr.File) == 0 {
return nil, errors.New("empty zip file")
if err := zw.Close(); err != nil {
pw.CloseWithError(fmt.Errorf("lsif parser: ZipWriter.Close: %v", err))
return
}
return zr.File[0].Open()
pw.Close()
}
......@@ -38,24 +38,21 @@ func verifyCorrectnessOf(t *testing.T, tmpDir, fileName string) {
}
func createFiles(t *testing.T, filePath, tmpDir string) {
t.Helper()
file, err := os.Open(filePath)
require.NoError(t, err)
p, err := NewParser(file, Config{})
parser, err := NewParser(file, Config{})
require.NoError(t, err)
r, err := p.ZipReader()
require.NoError(t, err)
require.NoError(t, p.Close())
zipFileName := tmpDir + ".zip"
w, err := os.Create(zipFileName)
require.NoError(t, err)
defer os.RemoveAll(zipFileName)
_, err = io.Copy(w, r)
_, err = io.Copy(w, parser)
require.NoError(t, err)
require.NoError(t, parser.Close())
extractZipFiles(t, tmpDir, zipFileName)
}
......
package parser
import (
"io"
"io/ioutil"
"os"
"runtime"
"testing"
......@@ -19,12 +21,12 @@ func BenchmarkGenerate(b *testing.B) {
file, err := os.Open(filePath)
require.NoError(b, err)
p, err := NewParser(file, Config{})
parser, err := NewParser(file, Config{})
require.NoError(b, err)
_, err = p.ZipReader()
_, err = io.Copy(ioutil.Discard, parser)
require.NoError(b, err)
require.NoError(b, p.Close())
require.NoError(b, parser.Close())
})
}
......
......@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"mime/multipart"
"net/http"
"strings"
......@@ -124,7 +125,7 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa
opts.TempFilePrefix = filename
var inputReader io.Reader
var inputReader io.ReadCloser
var err error
switch {
case exif.IsExifFile(filename):
......@@ -138,9 +139,11 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa
return err
}
default:
inputReader = p
inputReader = ioutil.NopCloser(p)
}
defer inputReader.Close()
fh, err := filestore.SaveFileFromReader(ctx, inputReader, -1, opts)
if err != nil {
switch err {
......@@ -166,36 +169,25 @@ func (rew *rewriter) handleFilePart(ctx context.Context, name string, p *multipa
return rew.filter.ProcessFile(ctx, name, fh, rew.writer)
}
func handleExifUpload(ctx context.Context, r io.Reader, filename string) (io.Reader, error) {
func handleExifUpload(ctx context.Context, r io.Reader, filename string) (io.ReadCloser, error) {
log.WithContextFields(ctx, log.Fields{
"filename": filename,
}).Print("running exiftool to remove any metadata")
return exif.NewCleaner(ctx, r)
}
func handleLsifUpload(ctx context.Context, reader io.Reader, tempPath, filename string, preauth *api.Response) (io.Reader, error) {
parserConfig := parser.Config{
TempPath: tempPath,
}
p, err := parser.NewParser(reader, parserConfig)
r, err := exif.NewCleaner(ctx, r)
if err != nil {
return nil, err
}
z, err := p.ZipReader()
if err != nil {
return nil, err
}
return ioutil.NopCloser(r), nil
}
if err := p.Close(); err != nil {
log.WithContextFields(ctx, log.Fields{
"filename": filename,
}).Print("failed to close lsif parser: " + err.Error())
func handleLsifUpload(ctx context.Context, reader io.Reader, tempPath, filename string, preauth *api.Response) (io.ReadCloser, error) {
parserConfig := parser.Config{
TempPath: tempPath,
}
return z, nil
return parser.NewParser(reader, parserConfig)
}
func (rew *rewriter) copyPart(ctx context.Context, name string, p *multipart.Part) error {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment