Commit a53848de authored by Aleksei Lipniagov's avatar Aleksei Lipniagov Committed by Alessio Caiazza

Use WH accelerated file uploads for Project Import

This is related to API endpoints only, UI endpoints
(Import::GitlabProjectsController) will be accelerated
in the next, separate MR.
Since we are touching an existing API, we should deploy it gradually.
So this MR will support both accelerated and "vanilla"
uploads for the endpoint.
parent beeaf441
---
title: Enable Workhorse upload acceleration for Project Import API
merge_request: 25361
author:
type: performance
...@@ -4,6 +4,8 @@ module API ...@@ -4,6 +4,8 @@ module API
class ProjectImport < Grape::API class ProjectImport < Grape::API
include PaginationParams include PaginationParams
MAXIMUM_FILE_SIZE = 50.megabytes
helpers Helpers::ProjectsHelpers helpers Helpers::ProjectsHelpers
helpers Helpers::FileUploadHelpers helpers Helpers::FileUploadHelpers
...@@ -19,6 +21,10 @@ module API ...@@ -19,6 +21,10 @@ module API
def rate_limiter def rate_limiter
::Gitlab::ApplicationRateLimiter ::Gitlab::ApplicationRateLimiter
end end
def with_workhorse_upload_acceleration?
request.headers[Gitlab::Workhorse::INTERNAL_API_REQUEST_HEADER].present?
end
end end
before do before do
...@@ -26,10 +32,25 @@ module API ...@@ -26,10 +32,25 @@ module API
end end
resource :projects, requirements: API::NAMESPACE_OR_PROJECT_REQUIREMENTS do resource :projects, requirements: API::NAMESPACE_OR_PROJECT_REQUIREMENTS do
desc 'Workhorse authorize the project import upload' do
detail 'This feature was introduced in GitLab 12.9'
end
post 'import/authorize' do
require_gitlab_workhorse!
status 200
content_type Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE
ImportExportUploader.workhorse_authorize(has_length: false, maximum_size: MAXIMUM_FILE_SIZE)
end
params do params do
requires :path, type: String, desc: 'The new project path and name' requires :path, type: String, desc: 'The new project path and name'
# TODO: remove rubocop disable - https://gitlab.com/gitlab-org/gitlab/issues/14960 # TODO: remove rubocop disable - https://gitlab.com/gitlab-org/gitlab/issues/14960
requires :file, type: File, desc: 'The project export file to be imported' # rubocop:disable Scalability/FileUploads # and mark WH fields as required (instead of optional) after the WH version including
# https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/459
# is deployed and GITLAB_WORKHORSE_VERSION is updated accordingly.
requires :file, types: [::API::Validations::Types::WorkhorseFile, File], desc: 'The project export file to be imported' # rubocop:disable Scalability/FileUploads
optional :name, type: String, desc: 'The name of the project to be imported. Defaults to the path of the project if not provided.' optional :name, type: String, desc: 'The name of the project to be imported. Defaults to the path of the project if not provided.'
optional :namespace, type: String, desc: "The ID or name of the namespace that the project will be imported into. Defaults to the current user's namespace." optional :namespace, type: String, desc: "The ID or name of the namespace that the project will be imported into. Defaults to the current user's namespace."
optional :overwrite, type: Boolean, default: false, desc: 'If there is a project in the same namespace and with the same name overwrite it' optional :overwrite, type: Boolean, default: false, desc: 'If there is a project in the same namespace and with the same name overwrite it'
...@@ -38,12 +59,24 @@ module API ...@@ -38,12 +59,24 @@ module API
desc: 'New project params to override values in the export' do desc: 'New project params to override values in the export' do
use :optional_project_params use :optional_project_params
end end
optional 'file.path', type: String, desc: 'Path to locally stored body (generated by Workhorse)'
optional 'file.name', type: String, desc: 'Real filename as send in Content-Disposition (generated by Workhorse)'
optional 'file.type', type: String, desc: 'Real content type as send in Content-Type (generated by Workhorse)'
optional 'file.size', type: Integer, desc: 'Real size of file (generated by Workhorse)'
optional 'file.md5', type: String, desc: 'MD5 checksum of the file (generated by Workhorse)'
optional 'file.sha1', type: String, desc: 'SHA1 checksum of the file (generated by Workhorse)'
optional 'file.sha256', type: String, desc: 'SHA256 checksum of the file (generated by Workhorse)'
optional 'file.etag', type: String, desc: 'Etag of the file (generated by Workhorse)'
optional 'file.remote_id', type: String, desc: 'Remote_id of the file (generated by Workhorse)'
optional 'file.remote_url', type: String, desc: 'Remote_url of the file (generated by Workhorse)'
end end
desc 'Create a new project import' do desc 'Create a new project import' do
detail 'This feature was introduced in GitLab 10.6.' detail 'This feature was introduced in GitLab 10.6.'
success Entities::ProjectImportStatus success Entities::ProjectImportStatus
end end
post 'import' do post 'import' do
require_gitlab_workhorse! if with_workhorse_upload_acceleration?
key = "project_import".to_sym key = "project_import".to_sym
if throttled?(key, [current_user, key]) if throttled?(key, [current_user, key])
...@@ -52,8 +85,6 @@ module API ...@@ -52,8 +85,6 @@ module API
render_api_error!({ error: _('This endpoint has been requested too many times. Try again later.') }, 429) render_api_error!({ error: _('This endpoint has been requested too many times. Try again later.') }, 429)
end end
validate_file!
Gitlab::QueryLimiting.whitelist('https://gitlab.com/gitlab-org/gitlab-foss/issues/42437') Gitlab::QueryLimiting.whitelist('https://gitlab.com/gitlab-org/gitlab-foss/issues/42437')
namespace = if import_params[:namespace] namespace = if import_params[:namespace]
...@@ -62,11 +93,21 @@ module API ...@@ -62,11 +93,21 @@ module API
current_user.namespace current_user.namespace
end end
# TODO: remove the condition after the WH version including
# https://gitlab.com/gitlab-org/gitlab-workhorse/-/merge_requests/459
# is deployed and GITLAB_WORKHORSE_VERSION is updated accordingly.
file = if with_workhorse_upload_acceleration?
import_params[:file] || bad_request!('Unable to process project import file')
else
validate_file!
import_params[:file]['tempfile']
end
project_params = { project_params = {
path: import_params[:path], path: import_params[:path],
namespace_id: namespace.id, namespace_id: namespace.id,
name: import_params[:name], name: import_params[:name],
file: import_params[:file]['tempfile'], file: file,
overwrite: import_params[:overwrite] overwrite: import_params[:overwrite]
} }
......
...@@ -3,11 +3,16 @@ ...@@ -3,11 +3,16 @@
require 'spec_helper' require 'spec_helper'
describe API::ProjectImport do describe API::ProjectImport do
include WorkhorseHelpers
let(:export_path) { "#{Dir.tmpdir}/project_export_spec" } let(:export_path) { "#{Dir.tmpdir}/project_export_spec" }
let(:user) { create(:user) } let(:user) { create(:user) }
let(:file) { File.join('spec', 'features', 'projects', 'import_export', 'test_project_export.tar.gz') } let(:file) { File.join('spec', 'features', 'projects', 'import_export', 'test_project_export.tar.gz') }
let(:namespace) { create(:group) } let(:namespace) { create(:group) }
let(:workhorse_token) { JWT.encode({ 'iss' => 'gitlab-workhorse' }, Gitlab::Workhorse.secret, 'HS256') }
let(:workhorse_headers) { { 'GitLab-Workhorse' => '1.0', Gitlab::Workhorse::INTERNAL_API_REQUEST_HEADER => workhorse_token } }
before do before do
allow_any_instance_of(Gitlab::ImportExport).to receive(:storage_path).and_return(export_path) allow_any_instance_of(Gitlab::ImportExport).to receive(:storage_path).and_return(export_path)
stub_uploads_object_storage(FileUploader) stub_uploads_object_storage(FileUploader)
...@@ -209,6 +214,55 @@ describe API::ProjectImport do ...@@ -209,6 +214,55 @@ describe API::ProjectImport do
end end
end end
context 'with direct upload enabled' do
subject { upload_archive(file_upload, workhorse_headers, params) }
let(:file_name) { 'project_export.tar.gz' }
let!(:fog_connection) do
stub_uploads_object_storage(ImportExportUploader, direct_upload: true)
end
let(:tmp_object) do
fog_connection.directories.new(key: 'uploads').files.create(
key: "tmp/uploads/#{file_name}",
body: fixture_file_upload(file)
)
end
let(:file_upload) { fog_to_uploaded_file(tmp_object) }
let(:params) do
{
path: 'test-import-project',
namespace: namespace.id,
'file.remote_id' => file_name,
'file.size' => file_upload.size
}
end
before do
allow(ImportExportUploader).to receive(:workhorse_upload_path).and_return('/')
end
it 'accepts the request and stores the file' do
expect { subject }.to change { Project.count }.by(1)
expect(response).to have_gitlab_http_status(:created)
end
end
def upload_archive(file, headers = {}, params = {})
workhorse_finalize(
api("/projects/import", user),
method: :post,
file_key: :file,
params: params.merge(file: file_upload),
headers: headers,
send_rewritten_field: true
)
end
def stub_import(namespace) def stub_import(namespace)
expect_any_instance_of(ProjectImportState).to receive(:schedule) expect_any_instance_of(ProjectImportState).to receive(:schedule)
expect(::Projects::CreateService).to receive(:new).with(user, hash_including(namespace_id: namespace.id)).and_call_original expect(::Projects::CreateService).to receive(:new).with(user, hash_including(namespace_id: namespace.id)).and_call_original
...@@ -238,4 +292,59 @@ describe API::ProjectImport do ...@@ -238,4 +292,59 @@ describe API::ProjectImport do
'import_error' => 'error') 'import_error' => 'error')
end end
end end
describe 'POST /projects/import/authorize' do
subject { post api('/projects/import/authorize', user), headers: workhorse_headers }
it 'authorizes importing project with workhorse header' do
subject
expect(response).to have_gitlab_http_status(:ok)
expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE)
end
it 'rejects requests that bypassed gitlab-workhorse' do
workhorse_headers.delete(Gitlab::Workhorse::INTERNAL_API_REQUEST_HEADER)
subject
expect(response).to have_gitlab_http_status(:forbidden)
end
context 'when using remote storage' do
context 'when direct upload is enabled' do
before do
stub_uploads_object_storage(ImportExportUploader, enabled: true, direct_upload: true)
end
it 'responds with status 200, location of file remote store and object details' do
subject
expect(response).to have_gitlab_http_status(:ok)
expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE)
expect(json_response).not_to have_key('TempPath')
expect(json_response['RemoteObject']).to have_key('ID')
expect(json_response['RemoteObject']).to have_key('GetURL')
expect(json_response['RemoteObject']).to have_key('StoreURL')
expect(json_response['RemoteObject']).to have_key('DeleteURL')
expect(json_response['RemoteObject']).to have_key('MultipartUpload')
end
end
context 'when direct upload is disabled' do
before do
stub_uploads_object_storage(ImportExportUploader, enabled: true, direct_upload: false)
end
it 'handles as a local file' do
subject
expect(response).to have_gitlab_http_status(:ok)
expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE)
expect(json_response['TempPath']).to eq(ImportExportUploader.workhorse_local_upload_path)
expect(json_response['RemoteObject']).to be_nil
end
end
end
end
end end
...@@ -32,12 +32,12 @@ module WorkhorseHelpers ...@@ -32,12 +32,12 @@ module WorkhorseHelpers
# workhorse_finalize will transform file_key inside params as if it was the finalize call of an inline object storage upload. # workhorse_finalize will transform file_key inside params as if it was the finalize call of an inline object storage upload.
# note that based on the content of the params it can simulate a disc acceleration or an object storage upload # note that based on the content of the params it can simulate a disc acceleration or an object storage upload
def workhorse_finalize(url, method: :post, file_key:, params:, headers: {}) def workhorse_finalize(url, method: :post, file_key:, params:, headers: {}, send_rewritten_field: false)
workhorse_request_with_file(method, url, workhorse_request_with_file(method, url,
file_key: file_key, file_key: file_key,
params: params, params: params,
extra_headers: headers, extra_headers: headers,
send_rewritten_field: false send_rewritten_field: send_rewritten_field
) )
end end
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment