Commit c6ea3624 authored by Nick Thomas's avatar Nick Thomas

Merge branch 'ac-fix-only-os-uplods' into 'master'

Don't write on shared FS when OS in enabled

See merge request gitlab-org/gitlab!18135
parents 36271f3a 6f6b484a
...@@ -180,10 +180,11 @@ module ObjectStorage ...@@ -180,10 +180,11 @@ module ObjectStorage
end end
def workhorse_authorize(has_length:, maximum_size: nil) def workhorse_authorize(has_length:, maximum_size: nil)
{ if self.object_store_enabled? && self.direct_upload_enabled?
RemoteObject: workhorse_remote_upload_options(has_length: has_length, maximum_size: maximum_size), { RemoteObject: workhorse_remote_upload_options(has_length: has_length, maximum_size: maximum_size) }
TempPath: workhorse_local_upload_path else
}.compact { TempPath: workhorse_local_upload_path }
end
end end
def workhorse_local_upload_path def workhorse_local_upload_path
......
---
title: Avoid dumping files on disk when direct_upload is enabled
merge_request: 18135
author:
type: performance
...@@ -6,6 +6,7 @@ require "fileutils" ...@@ -6,6 +6,7 @@ require "fileutils"
class UploadedFile class UploadedFile
InvalidPathError = Class.new(StandardError) InvalidPathError = Class.new(StandardError)
UnknownSizeError = Class.new(StandardError)
# The filename, *not* including the path, of the "uploaded" file # The filename, *not* including the path, of the "uploaded" file
attr_reader :original_filename attr_reader :original_filename
...@@ -18,37 +19,50 @@ class UploadedFile ...@@ -18,37 +19,50 @@ class UploadedFile
attr_reader :remote_id attr_reader :remote_id
attr_reader :sha256 attr_reader :sha256
attr_reader :size
def initialize(path, filename: nil, content_type: "application/octet-stream", sha256: nil, remote_id: nil) def initialize(path, filename: nil, content_type: "application/octet-stream", sha256: nil, remote_id: nil, size: nil)
if path.present?
raise InvalidPathError, "#{path} file does not exist" unless ::File.exist?(path) raise InvalidPathError, "#{path} file does not exist" unless ::File.exist?(path)
@tempfile = File.new(path, 'rb')
@size = @tempfile.size
else
begin
@size = Integer(size)
rescue ArgumentError, TypeError
raise UnknownSizeError, 'Unable to determine file size'
end
end
@content_type = content_type @content_type = content_type
@original_filename = sanitize_filename(filename || path) @original_filename = sanitize_filename(filename || path || '')
@content_type = content_type @content_type = content_type
@sha256 = sha256 @sha256 = sha256
@remote_id = remote_id @remote_id = remote_id
@tempfile = File.new(path, 'rb')
end end
def self.from_params(params, field, upload_paths) def self.from_params(params, field, upload_paths)
unless params["#{field}.path"] path = params["#{field}.path"]
raise InvalidPathError, "file is invalid" if params["#{field}.remote_id"] remote_id = params["#{field}.remote_id"]
return if path.blank? && remote_id.blank?
return file_path = nil
end if path
file_path = File.realpath(path)
file_path = File.realpath(params["#{field}.path"])
paths = Array(upload_paths) << Dir.tmpdir paths = Array(upload_paths) << Dir.tmpdir
unless self.allowed_path?(file_path, paths.compact) unless self.allowed_path?(file_path, paths.compact)
raise InvalidPathError, "insecure path used '#{file_path}'" raise InvalidPathError, "insecure path used '#{file_path}'"
end end
end
UploadedFile.new(file_path, UploadedFile.new(file_path,
filename: params["#{field}.name"], filename: params["#{field}.name"],
content_type: params["#{field}.type"] || 'application/octet-stream', content_type: params["#{field}.type"] || 'application/octet-stream',
sha256: params["#{field}.sha256"], sha256: params["#{field}.sha256"],
remote_id: params["#{field}.remote_id"]) remote_id: remote_id,
size: params["#{field}.size"])
end end
def self.allowed_path?(file_path, paths) def self.allowed_path?(file_path, paths)
...@@ -68,7 +82,11 @@ class UploadedFile ...@@ -68,7 +82,11 @@ class UploadedFile
end end
def path def path
@tempfile.path @tempfile&.path
end
def close
@tempfile&.close
end end
alias_method :local_path, :path alias_method :local_path, :path
......
...@@ -72,16 +72,6 @@ describe UploadedFile do ...@@ -72,16 +72,6 @@ describe UploadedFile do
end end
end end
context 'when only remote id is specified' do
let(:params) do
{ 'file.remote_id' => 'remote_id' }
end
it "raises an error" do
expect { subject }.to raise_error(UploadedFile::InvalidPathError, /file is invalid/)
end
end
context 'when verifying allowed paths' do context 'when verifying allowed paths' do
let(:params) do let(:params) do
{ 'file.path' => temp_file.path } { 'file.path' => temp_file.path }
...@@ -120,6 +110,52 @@ describe UploadedFile do ...@@ -120,6 +110,52 @@ describe UploadedFile do
end end
end end
describe '.initialize' do
context 'when no size is provided' do
it 'determine size from local path' do
file = described_class.new(temp_file.path)
expect(file.size).to eq(temp_file.size)
end
it 'raises an exception if is a remote file' do
expect do
described_class.new(nil, remote_id: 'id')
end.to raise_error(UploadedFile::UnknownSizeError, 'Unable to determine file size')
end
end
context 'when size is a number' do
let_it_be(:size) { 1.gigabyte }
it 'is overridden by the size of the local file' do
file = described_class.new(temp_file.path, size: size)
expect(file.size).to eq(temp_file.size)
end
it 'is respected if is a remote file' do
file = described_class.new(nil, remote_id: 'id', size: size)
expect(file.size).to eq(size)
end
end
context 'when size is a string' do
it 'is converted to a number' do
file = described_class.new(nil, remote_id: 'id', size: '1')
expect(file.size).to eq(1)
end
it 'raises an exception if does not represent a number' do
expect do
described_class.new(nil, remote_id: 'id', size: 'not a number')
end.to raise_error(UploadedFile::UnknownSizeError, 'Unable to determine file size')
end
end
end
describe '#sanitize_filename' do describe '#sanitize_filename' do
it { expect(described_class.new(temp_file.path).sanitize_filename('spaced name')).to eq('spaced_name') } it { expect(described_class.new(temp_file.path).sanitize_filename('spaced name')).to eq('spaced_name') }
it { expect(described_class.new(temp_file.path).sanitize_filename('#$%^&')).to eq('_____') } it { expect(described_class.new(temp_file.path).sanitize_filename('#$%^&')).to eq('_____') }
......
...@@ -3,6 +3,7 @@ require 'spec_helper' ...@@ -3,6 +3,7 @@ require 'spec_helper'
describe API::Runner, :clean_gitlab_redis_shared_state do describe API::Runner, :clean_gitlab_redis_shared_state do
include StubGitlabCalls include StubGitlabCalls
include RedisHelpers include RedisHelpers
include WorkhorseHelpers
let(:registration_token) { 'abcdefg123456' } let(:registration_token) { 'abcdefg123456' }
...@@ -1395,7 +1396,7 @@ describe API::Runner, :clean_gitlab_redis_shared_state do ...@@ -1395,7 +1396,7 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
expect(response).to have_gitlab_http_status(200) expect(response).to have_gitlab_http_status(200)
expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE) expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE)
expect(json_response['TempPath']).to eq(JobArtifactUploader.workhorse_local_upload_path) expect(json_response).not_to have_key('TempPath')
expect(json_response['RemoteObject']).to have_key('ID') expect(json_response['RemoteObject']).to have_key('ID')
expect(json_response['RemoteObject']).to have_key('GetURL') expect(json_response['RemoteObject']).to have_key('GetURL')
expect(json_response['RemoteObject']).to have_key('StoreURL') expect(json_response['RemoteObject']).to have_key('StoreURL')
...@@ -1562,15 +1563,16 @@ describe API::Runner, :clean_gitlab_redis_shared_state do ...@@ -1562,15 +1563,16 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
let!(:fog_connection) do let!(:fog_connection) do
stub_artifacts_object_storage(direct_upload: true) stub_artifacts_object_storage(direct_upload: true)
end end
let(:object) do
before do
fog_connection.directories.new(key: 'artifacts').files.create( fog_connection.directories.new(key: 'artifacts').files.create(
key: 'tmp/uploads/12312300', key: 'tmp/uploads/12312300',
body: 'content' body: 'content'
) )
end
let(:file_upload) { fog_to_uploaded_file(object) }
upload_artifacts(file_upload, headers_with_token, before do
{ 'file.remote_id' => remote_id }) upload_artifacts(file_upload, headers_with_token, 'file.remote_id' => remote_id)
end end
context 'when valid remote_id is used' do context 'when valid remote_id is used' do
...@@ -1804,12 +1806,13 @@ describe API::Runner, :clean_gitlab_redis_shared_state do ...@@ -1804,12 +1806,13 @@ describe API::Runner, :clean_gitlab_redis_shared_state do
end end
def upload_artifacts(file, headers = {}, params = {}) def upload_artifacts(file, headers = {}, params = {})
params = params.merge({ workhorse_finalize(
'file.path' => file.path, api("/jobs/#{job.id}/artifacts"),
'file.name' => file.original_filename method: :post,
}) file_key: :file,
params: params.merge(file: file),
post api("/jobs/#{job.id}/artifacts"), params: params, headers: headers headers: headers
)
end end
end end
......
...@@ -4,6 +4,7 @@ require 'spec_helper' ...@@ -4,6 +4,7 @@ require 'spec_helper'
describe 'Git LFS API and storage' do describe 'Git LFS API and storage' do
include LfsHttpHelpers include LfsHttpHelpers
include ProjectForksHelper include ProjectForksHelper
include WorkhorseHelpers
set(:project) { create(:project, :repository) } set(:project) { create(:project, :repository) }
set(:other_project) { create(:project, :repository) } set(:other_project) { create(:project, :repository) }
...@@ -933,7 +934,7 @@ describe 'Git LFS API and storage' do ...@@ -933,7 +934,7 @@ describe 'Git LFS API and storage' do
it_behaves_like 'a valid response' do it_behaves_like 'a valid response' do
it 'responds with status 200, location of LFS remote store and object details' do it 'responds with status 200, location of LFS remote store and object details' do
expect(json_response['TempPath']).to eq(LfsObjectUploader.workhorse_local_upload_path) expect(json_response).not_to have_key('TempPath')
expect(json_response['RemoteObject']).to have_key('ID') expect(json_response['RemoteObject']).to have_key('ID')
expect(json_response['RemoteObject']).to have_key('GetURL') expect(json_response['RemoteObject']).to have_key('GetURL')
expect(json_response['RemoteObject']).to have_key('StoreURL') expect(json_response['RemoteObject']).to have_key('StoreURL')
...@@ -992,10 +993,17 @@ describe 'Git LFS API and storage' do ...@@ -992,10 +993,17 @@ describe 'Git LFS API and storage' do
stub_lfs_object_storage(direct_upload: true) stub_lfs_object_storage(direct_upload: true)
end end
let(:tmp_object) do
fog_connection.directories.new(key: 'lfs-objects').files.create(
key: 'tmp/uploads/12312300',
body: 'content'
)
end
['123123', '../../123123'].each do |remote_id| ['123123', '../../123123'].each do |remote_id|
context "with invalid remote_id: #{remote_id}" do context "with invalid remote_id: #{remote_id}" do
subject do subject do
put_finalize(with_tempfile: true, args: { put_finalize(remote_object: tmp_object, args: {
'file.remote_id' => remote_id 'file.remote_id' => remote_id
}) })
end end
...@@ -1009,15 +1017,8 @@ describe 'Git LFS API and storage' do ...@@ -1009,15 +1017,8 @@ describe 'Git LFS API and storage' do
end end
context 'with valid remote_id' do context 'with valid remote_id' do
before do
fog_connection.directories.new(key: 'lfs-objects').files.create(
key: 'tmp/uploads/12312300',
body: 'content'
)
end
subject do subject do
put_finalize(with_tempfile: true, args: { put_finalize(remote_object: tmp_object, args: {
'file.remote_id' => '12312300', 'file.remote_id' => '12312300',
'file.name' => 'name' 'file.name' => 'name'
}) })
...@@ -1027,6 +1028,10 @@ describe 'Git LFS API and storage' do ...@@ -1027,6 +1028,10 @@ describe 'Git LFS API and storage' do
subject subject
expect(response).to have_gitlab_http_status(200) expect(response).to have_gitlab_http_status(200)
object = LfsObject.find_by_oid(sample_oid)
expect(object).to be_present
expect(object.file.read).to eq(tmp_object.body)
end end
it 'schedules migration of file to object storage' do it 'schedules migration of file to object storage' do
...@@ -1268,28 +1273,31 @@ describe 'Git LFS API and storage' do ...@@ -1268,28 +1273,31 @@ describe 'Git LFS API and storage' do
put authorize_url(project, sample_oid, sample_size), params: {}, headers: authorize_headers put authorize_url(project, sample_oid, sample_size), params: {}, headers: authorize_headers
end end
def put_finalize(lfs_tmp = lfs_tmp_file, with_tempfile: false, verified: true, args: {}) def put_finalize(lfs_tmp = lfs_tmp_file, with_tempfile: false, verified: true, remote_object: nil, args: {})
uploaded_file = nil
if with_tempfile
upload_path = LfsObjectUploader.workhorse_local_upload_path upload_path = LfsObjectUploader.workhorse_local_upload_path
file_path = upload_path + '/' + lfs_tmp if lfs_tmp file_path = upload_path + '/' + lfs_tmp if lfs_tmp
if with_tempfile
FileUtils.mkdir_p(upload_path) FileUtils.mkdir_p(upload_path)
FileUtils.touch(file_path) FileUtils.touch(file_path)
end
extra_args = {
'file.path' => file_path,
'file.name' => File.basename(file_path)
}
put_finalize_with_args(args.merge(extra_args).compact, verified: verified) uploaded_file = UploadedFile.new(file_path, filename: File.basename(file_path))
elsif remote_object
uploaded_file = fog_to_uploaded_file(remote_object)
end end
def put_finalize_with_args(args, verified:)
finalize_headers = headers finalize_headers = headers
finalize_headers.merge!(workhorse_internal_api_request_header) if verified finalize_headers.merge!(workhorse_internal_api_request_header) if verified
put objects_url(project, sample_oid, sample_size), params: args, headers: finalize_headers workhorse_finalize(
objects_url(project, sample_oid, sample_size),
method: :put,
file_key: :file,
params: args.merge(file: uploaded_file),
headers: finalize_headers
)
end end
def lfs_tmp_file def lfs_tmp_file
......
...@@ -22,16 +22,40 @@ module WorkhorseHelpers ...@@ -22,16 +22,40 @@ module WorkhorseHelpers
# workhorse_post_with_file will transform file_key inside params as if it was disk accelerated by workhorse # workhorse_post_with_file will transform file_key inside params as if it was disk accelerated by workhorse
def workhorse_post_with_file(url, file_key:, params:) def workhorse_post_with_file(url, file_key:, params:)
workhorse_request_with_file(:post, url,
file_key: file_key,
params: params,
env: { 'CONTENT_TYPE' => 'multipart/form-data' },
send_rewritten_field: true
)
end
# workhorse_finalize will transform file_key inside params as if it was the finalize call of an inline object storage upload.
# note that based on the content of the params it can simulate a disc acceleration or an object storage upload
def workhorse_finalize(url, method: :post, file_key:, params:, headers: {})
workhorse_request_with_file(method, url,
file_key: file_key,
params: params,
extra_headers: headers,
send_rewritten_field: false
)
end
def workhorse_request_with_file(method, url, file_key:, params:, env: {}, extra_headers: {}, send_rewritten_field:)
workhorse_params = params.dup workhorse_params = params.dup
file = workhorse_params.delete(file_key) file = workhorse_params.delete(file_key)
workhorse_params.merge!(workhorse_disk_accelerated_file_params(file_key, file)) workhorse_params = workhorse_disk_accelerated_file_params(file_key, file).merge(workhorse_params)
post(url, headers = if send_rewritten_field
params: workhorse_params, workhorse_rewritten_fields_header(file_key => file.path)
headers: workhorse_rewritten_fields_header(file_key => file.path), else
env: { 'CONTENT_TYPE' => 'multipart/form-data' } {}
) end
headers.merge!(extra_headers)
process(method, url, params: workhorse_params, headers: headers, env: env)
end end
private private
...@@ -45,9 +69,24 @@ module WorkhorseHelpers ...@@ -45,9 +69,24 @@ module WorkhorseHelpers
end end
def workhorse_disk_accelerated_file_params(key, file) def workhorse_disk_accelerated_file_params(key, file)
return {} unless file
{ {
"#{key}.name" => file.original_filename, "#{key}.name" => file.original_filename,
"#{key}.path" => file.path "#{key}.size" => file.size
} }.tap do |params|
params["#{key}.path"] = file.path if file.path
params["#{key}.remote_id"] = file.remote_id if file.respond_to?(:remote_id) && file.remote_id
end
end
def fog_to_uploaded_file(file)
filename = File.basename(file.key)
UploadedFile.new(nil,
filename: filename,
remote_id: filename,
size: file.content_length
)
end end
end end
...@@ -338,7 +338,7 @@ shared_examples 'handle uploads authorize' do ...@@ -338,7 +338,7 @@ shared_examples 'handle uploads authorize' do
it_behaves_like 'a valid response' do it_behaves_like 'a valid response' do
it 'responds with status 200, location of uploads remote store and object details' do it 'responds with status 200, location of uploads remote store and object details' do
expect(json_response['TempPath']).to eq(uploader_class.workhorse_local_upload_path) expect(json_response).not_to have_key('TempPath')
expect(json_response['RemoteObject']).to have_key('ID') expect(json_response['RemoteObject']).to have_key('ID')
expect(json_response['RemoteObject']).to have_key('GetURL') expect(json_response['RemoteObject']).to have_key('GetURL')
expect(json_response['RemoteObject']).to have_key('StoreURL') expect(json_response['RemoteObject']).to have_key('StoreURL')
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment