Commit 242c4c8f authored by Kassio Borges's avatar Kassio Borges

Fix project import from remote to import from S3

Currently, `Import::GitlabProjects::CreateProjectFromRemoteFileService`
validates:
1. The file size based on the `Content-Length` header, which S3 doesn't
   returb on `head` requests;
1. The file content type, which S3 use `application/x-tar` instead of
   `application/gzip` for a `tar.gz`;

Related to: https://gitlab.com/gitlab-org/gitlab/-/issues/337230

Changelog: fixed
MR: https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75170
parent 1bd0df77
...@@ -4,7 +4,10 @@ module Import ...@@ -4,7 +4,10 @@ module Import
module GitlabProjects module GitlabProjects
class CreateProjectFromRemoteFileService < CreateProjectFromUploadedFileService class CreateProjectFromRemoteFileService < CreateProjectFromUploadedFileService
FILE_SIZE_LIMIT = 10.gigabytes FILE_SIZE_LIMIT = 10.gigabytes
ALLOWED_CONTENT_TYPES = ['application/gzip'].freeze ALLOWED_CONTENT_TYPES = [
'application/gzip', # most common content-type when fetching a tar.gz
'application/x-tar' # aws-s3 uses x-tar for tar.gz files
].freeze
validate :valid_remote_import_url? validate :valid_remote_import_url?
validate :validate_file_size validate :validate_file_size
...@@ -44,17 +47,27 @@ module Import ...@@ -44,17 +47,27 @@ module Import
end end
def validate_content_type def validate_content_type
# AWS-S3 presigned URLs don't respond to HTTP HEAD requests,
# so file type cannot be validated
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75170#note_748059103
return if amazon_s3?
if headers['content-type'].blank? if headers['content-type'].blank?
errors.add(:base, "Missing 'ContentType' header") errors.add(:base, "Missing 'ContentType' header")
elsif !ALLOWED_CONTENT_TYPES.include?(headers['content-type']) elsif !ALLOWED_CONTENT_TYPES.include?(headers['content-type'])
errors.add(:base, "Remote file content type '%{content_type}' not allowed. (Allowed content types: %{allowed})" % { errors.add(:base, "Remote file content type '%{content_type}' not allowed. (Allowed content types: %{allowed})" % {
content_type: headers['content-type'], content_type: headers['content-type'],
allowed: ALLOWED_CONTENT_TYPES.join(',') allowed: ALLOWED_CONTENT_TYPES.join(', ')
}) })
end end
end end
def validate_file_size def validate_file_size
# AWS-S3 presigned URLs don't respond to HTTP HEAD requests,
# so file size cannot be validated
# https://gitlab.com/gitlab-org/gitlab/-/merge_requests/75170#note_748059103
return if amazon_s3?
if headers['content-length'].to_i == 0 if headers['content-length'].to_i == 0
errors.add(:base, "Missing 'ContentLength' header") errors.add(:base, "Missing 'ContentLength' header")
elsif headers['content-length'].to_i > FILE_SIZE_LIMIT elsif headers['content-length'].to_i > FILE_SIZE_LIMIT
...@@ -64,6 +77,10 @@ module Import ...@@ -64,6 +77,10 @@ module Import
end end
end end
def amazon_s3?
headers['Server'] == 'AmazonS3' && headers['x-amz-request-id'].present?
end
def headers def headers
return {} if params[:remote_import_url].blank? || !valid_remote_import_url? return {} if params[:remote_import_url].blank? || !valid_remote_import_url?
......
...@@ -18,9 +18,10 @@ RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do ...@@ -18,9 +18,10 @@ RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do
subject { described_class.new(user, params) } subject { described_class.new(user, params) }
shared_examples 'successfully import' do |content_type|
it 'creates a project and returns a successful response' do it 'creates a project and returns a successful response' do
stub_headers_for(remote_url, { stub_headers_for(remote_url, {
'content-type' => 'application/gzip', 'content-type' => content_type,
'content-length' => '10' 'content-length' => '10'
}) })
...@@ -35,6 +36,10 @@ RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do ...@@ -35,6 +36,10 @@ RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do
expect(response.payload.path).to eq('path') expect(response.payload.path).to eq('path')
expect(response.payload.namespace).to eq(user.namespace) expect(response.payload.namespace).to eq(user.namespace)
end end
end
it_behaves_like 'successfully import', 'application/gzip'
it_behaves_like 'successfully import', 'application/x-tar'
context 'when the file url is invalid' do context 'when the file url is invalid' do
it 'returns an erred response with the reason of the failure' do it 'returns an erred response with the reason of the failure' do
...@@ -79,7 +84,7 @@ RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do ...@@ -79,7 +84,7 @@ RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do
expect(response).not_to be_success expect(response).not_to be_success
expect(response.http_status).to eq(:bad_request) expect(response.http_status).to eq(:bad_request)
expect(response.message) expect(response.message)
.to eq("Remote file content type 'application/js' not allowed. (Allowed content types: application/gzip)") .to eq("Remote file content type 'application/js' not allowed. (Allowed content types: application/gzip, application/x-tar)")
end end
end end
...@@ -130,6 +135,20 @@ RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do ...@@ -130,6 +135,20 @@ RSpec.describe ::Import::GitlabProjects::CreateProjectFromRemoteFileService do
end end
end end
it 'does not validate content-type or content-length when the file is stored in AWS-S3' do
stub_headers_for(remote_url, {
'Server' => 'AmazonS3',
'x-amz-request-id' => 'Something'
})
response = nil
expect { response = subject.execute }
.to change(Project, :count)
expect(response).to be_success
expect(response.http_status).to eq(:ok)
end
context 'when required parameters are not provided' do context 'when required parameters are not provided' do
let(:params) { {} } let(:params) { {} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment