Commit 7d06e7d7 authored by Matthias Käppler's avatar Matthias Käppler

Merge branch 'georgekoltsov/update-import-archive-file-validator' into 'master'

Update Import archive size validator

See merge request gitlab-org/gitlab!52893
parents b488e2f5 9e81a3a0
# frozen_string_literal: true
require 'zlib'
module Gitlab
module ImportExport
class DecompressedArchiveSizeValidator
include Gitlab::Utils::StrongMemoize
DEFAULT_MAX_BYTES = 10.gigabytes.freeze
CHUNK_SIZE = 4096.freeze
attr_reader :error
TIMEOUT_LIMIT = 60.seconds
def initialize(archive_path:, max_bytes: self.class.max_bytes)
@archive_path = archive_path
@max_bytes = max_bytes
@bytes_read = 0
@total_reads = 0
@denominator = 5
@error = nil
end
def valid?
......@@ -31,59 +23,62 @@ module Gitlab
DEFAULT_MAX_BYTES
end
def archive_file
@archive_file ||= File.open(@archive_path)
end
private
def validate
until archive_file.eof?
compressed_chunk = archive_file.read(CHUNK_SIZE)
pgrp = nil
valid_archive = true
inflate_stream.inflate(compressed_chunk) do |chunk|
@bytes_read += chunk.size
@total_reads += 1
end
Timeout.timeout(TIMEOUT_LIMIT) do
stdin, stdout, stderr, wait_thr = Open3.popen3(command, pgroup: true)
stdin.close
pgrp = Process.getpgid(wait_thr[:pid])
status = wait_thr.value
# Start garbage collection every 5 reads in order
# to prevent memory bloat during archive decompression
GC.start if gc_start?
if status.success?
result = stdout.readline
if @bytes_read > @max_bytes
@error = error_message
if result.to_i > @max_bytes
valid_archive = false
return false
log_error('Decompressed archive size limit reached')
end
else
valid_archive = false
log_error(stderr.readline)
end
ensure
stdout.close
stderr.close
end
true
rescue => e
@error = error_message
valid_archive
rescue Timeout::Error
log_error('Timeout reached during archive decompression')
Gitlab::ErrorTracking.track_exception(e)
Gitlab::Import::Logger.info(
message: @error,
error: e.message
)
Process.kill(-1, pgrp) if pgrp
false
ensure
inflate_stream.close
archive_file.close
end
rescue => e
log_error(e.message)
def inflate_stream
@inflate_stream ||= Zlib::Inflate.new(Zlib::MAX_WBITS + 32)
Process.kill(-1, pgrp) if pgrp
false
end
def gc_start?
@total_reads % @denominator == 0
def command
"gzip -dc #{@archive_path} | wc -c"
end
def error_message
_('Decompressed archive size validation failed.')
def log_error(error)
Gitlab::Import::Logger.info(
message: error,
import_upload_archive_path: @archive_path,
import_upload_archive_size: File.size(@archive_path)
)
end
end
end
......
......@@ -87,7 +87,7 @@ module Gitlab
end
def validate_decompressed_archive_size
raise ImporterError.new(size_validator.error) unless size_validator.valid?
raise ImporterError.new(_('Decompressed archive size validation failed.')) unless size_validator.valid?
end
def size_validator
......
......@@ -27,25 +27,55 @@ RSpec.describe Gitlab::ImportExport::DecompressedArchiveSizeValidator do
end
context 'when file exceeds allowed decompressed size' do
it 'returns false' do
it 'logs error message returns false' do
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
import_upload_archive_path: filepath,
import_upload_archive_size: File.size(filepath),
message: 'Decompressed archive size limit reached'
)
expect(subject.valid?).to eq(false)
end
end
context 'when something goes wrong during decompression' do
before do
allow(subject.archive_file).to receive(:eof?).and_raise(StandardError)
context 'when exception occurs during decompression' do
shared_examples 'logs raised exception and terminates validator process group' do
let(:std) { double(:std, close: nil, value: nil) }
let(:wait_thr) { double }
before do
allow(Process).to receive(:getpgid).and_return(2)
allow(Open3).to receive(:popen3).and_return([std, std, std, wait_thr])
allow(wait_thr).to receive(:[]).with(:pid).and_return(1)
allow(wait_thr).to receive(:value).and_raise(exception)
end
it 'logs raised exception and terminates validator process group' do
expect(Gitlab::Import::Logger)
.to receive(:info)
.with(
import_upload_archive_path: filepath,
import_upload_archive_size: File.size(filepath),
message: error_message
)
expect(Process).to receive(:kill).with(-1, 2)
expect(subject.valid?).to eq(false)
end
end
it 'logs and tracks raised exception' do
expect(Gitlab::ErrorTracking).to receive(:track_exception).with(instance_of(StandardError))
expect(Gitlab::Import::Logger).to receive(:info).with(hash_including(message: 'Decompressed archive size validation failed.'))
context 'when timeout occurs' do
let(:error_message) { 'Timeout reached during archive decompression' }
let(:exception) { Timeout::Error }
subject.valid?
include_examples 'logs raised exception and terminates validator process group'
end
it 'returns false' do
expect(subject.valid?).to eq(false)
context 'when exception occurs' do
let(:error_message) { 'Error!' }
let(:exception) { StandardError.new(error_message) }
include_examples 'logs raised exception and terminates validator process group'
end
end
end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment