Commit ae2fde44 authored by Grzegorz Bizon's avatar Grzegorz Bizon

Merge branch 'store-lfs-on-object-storage-by-default' into 'master'

Store lfs on object storage by default

See merge request gitlab-org/gitlab-ee!4794
parents 68fcad12 b9c177db
......@@ -107,16 +107,16 @@ gem 'carrierwave', '~> 1.2'
gem 'dropzonejs-rails', '~> 0.7.1'
# for backups
gem 'fog-aws', '~> 1.4'
gem 'fog-aws', '~> 2.0'
gem 'fog-core', '~> 1.44'
gem 'fog-google', '~> 0.5'
gem 'fog-google', '~> 1.3'
gem 'fog-local', '~> 0.3'
gem 'fog-openstack', '~> 0.1'
gem 'fog-rackspace', '~> 0.1.1'
gem 'fog-aliyun', '~> 0.2.0'
# for Google storage
gem 'google-api-client', '~> 0.13.6'
gem 'google-api-client', '~> 0.19'
# for aws storage
gem 'unf', '~> 0.1.4'
......
......@@ -213,7 +213,7 @@ GEM
et-orbi (1.0.3)
tzinfo
eventmachine (1.0.8)
excon (0.57.1)
excon (0.60.0)
execjs (2.6.0)
expression_parser (0.9.0)
factory_bot (4.8.2)
......@@ -255,19 +255,20 @@ GEM
fog-json (~> 1.0)
ipaddress (~> 0.8)
xml-simple (~> 1.1)
fog-aws (1.4.0)
fog-aws (2.0.1)
fog-core (~> 1.38)
fog-json (~> 1.0)
fog-xml (~> 0.1)
ipaddress (~> 0.8)
fog-core (1.44.3)
fog-core (1.45.0)
builder
excon (~> 0.49)
excon (~> 0.58)
formatador (~> 0.2)
fog-google (0.5.3)
fog-google (1.3.0)
fog-core
fog-json
fog-xml
google-api-client (~> 0.19.1)
fog-json (1.0.2)
fog-core (~> 1.0)
multi_json (~> 1.10)
......@@ -358,9 +359,9 @@ GEM
json
multi_json
request_store (>= 1.0)
google-api-client (0.13.6)
google-api-client (0.19.8)
addressable (~> 2.5, >= 2.5.1)
googleauth (~> 0.5)
googleauth (>= 0.5, < 0.7.0)
httpclient (>= 2.8.1, < 3.0)
mime-types (~> 3.0)
representable (~> 3.0)
......@@ -531,7 +532,7 @@ GEM
mini_portile2 (2.3.0)
minitest (5.7.0)
mousetrap-rails (1.4.6)
multi_json (1.12.2)
multi_json (1.13.1)
multi_xml (0.6.0)
multipart-post (2.0.0)
mustermann (1.0.0)
......@@ -1077,9 +1078,9 @@ DEPENDENCIES
flipper-active_record (~> 0.11.0)
flipper-active_support_cache_store (~> 0.11.0)
fog-aliyun (~> 0.2.0)
fog-aws (~> 1.4)
fog-aws (~> 2.0)
fog-core (~> 1.44)
fog-google (~> 0.5)
fog-google (~> 1.3)
fog-local (~> 0.3)
fog-openstack (~> 0.1)
fog-rackspace (~> 0.1.1)
......@@ -1101,7 +1102,7 @@ DEPENDENCIES
gollum-lib (~> 4.2)
gollum-rugged_adapter (~> 0.4.4)
gon (~> 6.1.0)
google-api-client (~> 0.13.6)
google-api-client (~> 0.19)
google-protobuf (= 3.5.1)
gpgme
grape (~> 1.0)
......
......@@ -17,20 +17,23 @@ class Projects::LfsStorageController < Projects::GitHttpClientController
def upload_authorize
set_workhorse_internal_api_content_type
render json: Gitlab::Workhorse.lfs_upload_ok(oid, size)
authorized = LfsObjectUploader.workhorse_authorize
authorized.merge!(LfsOid: oid, LfsSize: size)
render json: authorized
end
def upload_finalize
unless tmp_filename
render_lfs_forbidden
return
end
if store_file(oid, size, tmp_filename)
if store_file!(oid, size)
head 200
else
render plain: 'Unprocessable entity', status: 422
end
rescue ActiveRecord::RecordInvalid
render_400
rescue ObjectStorage::RemoteStoreError
render_lfs_forbidden
end
private
......@@ -51,38 +54,28 @@ class Projects::LfsStorageController < Projects::GitHttpClientController
params[:size].to_i
end
def tmp_filename
name = request.headers['X-Gitlab-Lfs-Tmp']
return if name.include?('/')
return unless oid.present? && name.start_with?(oid)
name
end
def store_file!(oid, size)
object = LfsObject.find_by(oid: oid, size: size)
unless object&.file&.exists?
object = create_file!(oid, size)
end
def store_file(oid, size, tmp_file)
# Define tmp_file_path early because we use it in "ensure"
tmp_file_path = File.join(LfsObjectUploader.workhorse_upload_path, tmp_file)
return unless object
object = LfsObject.find_or_create_by(oid: oid, size: size)
file_exists = object.file.exists? || move_tmp_file_to_storage(object, tmp_file_path)
file_exists && link_to_project(object)
ensure
FileUtils.rm_f(tmp_file_path)
link_to_project!(object)
end
def move_tmp_file_to_storage(object, path)
File.open(path) do |f|
object.file = f
def create_file!(oid, size)
LfsObject.new(oid: oid, size: size).tap do |object|
object.file.store_workhorse_file!(params, :file)
object.save!
end
object.file.store!
object.save
end
def link_to_project(object)
def link_to_project!(object)
if object && !object.projects.exists?(storage_project.id)
object.projects << storage_project
object.save
object.save!
end
end
end
......@@ -9,6 +9,12 @@ class LfsObject < ActiveRecord::Base
mount_uploader :file, LfsObjectUploader
before_save :update_file_store
def update_file_store
self.file_store = file.object_store
end
def project_allowed_access?(project)
projects.exists?(project.lfs_storage_project.id)
end
......
......@@ -2,11 +2,6 @@ class LfsObjectUploader < GitlabUploader
extend Workhorse::UploadPath
include ObjectStorage::Concern
# LfsObject are in `tmp/upload` instead of `tmp/uploads`
def self.workhorse_upload_path
File.join(root, 'tmp/upload')
end
storage_options Gitlab.config.lfs
def filename
......
---
title: Upgrade GitLab Workhorse to 4.0.0
merge_request:
author:
type: added
......@@ -794,7 +794,7 @@ test:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
region: us-east-1
artifacts:
path: tmp/tests/artifacts
enabled: true
......@@ -808,7 +808,7 @@ test:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
region: us-east-1
uploads:
storage_path: tmp/tests/public
enabled: true
......@@ -818,7 +818,7 @@ test:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
region: us-east-1
gitlab:
host: localhost
port: 80
......
......@@ -397,6 +397,7 @@ Settings.lfs['storage_path'] = Settings.absolute(Settings.lfs['storage_path'] ||
Settings.lfs['object_store'] ||= Settingslogic.new({})
Settings.lfs['object_store']['enabled'] = false if Settings.lfs['object_store']['enabled'].nil?
Settings.lfs['object_store']['remote_directory'] ||= nil
Settings.lfs['object_store']['direct_upload'] = false if Settings.lfs['object_store']['direct_upload'].nil?
Settings.lfs['object_store']['background_upload'] = true if Settings.lfs['object_store']['background_upload'].nil?
Settings.lfs['object_store']['proxy_download'] = false if Settings.lfs['object_store']['proxy_download'].nil?
# Convert upload connection settings to use string keys, to make Fog happy
......
......@@ -28,16 +28,4 @@ if File.exist?(aws_file)
# when fog_public is false and provider is AWS or Google, defaults to 600
config.fog_authenticated_url_expiration = 1 << 29
end
# Mocking Fog requests, based on: https://github.com/carrierwaveuploader/carrierwave/wiki/How-to%3A-Test-Fog-based-uploaders
if Rails.env.test?
Fog.mock!
connection = ::Fog::Storage.new(
aws_access_key_id: AWS_CONFIG['access_key_id'],
aws_secret_access_key: AWS_CONFIG['secret_access_key'],
provider: 'AWS',
region: AWS_CONFIG['region']
)
connection.directories.create(key: AWS_CONFIG['bucket'])
end
end
......@@ -63,6 +63,7 @@ For source installations the following settings are nested under `lfs:` and then
|---------|-------------|---------|
| `enabled` | Enable/disable object storage | `false` |
| `remote_directory` | The bucket name where LFS objects will be stored| |
| `direct_upload` | Set to true to enable direct upload of LFS without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. | `false` |
| `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` |
| `proxy_download` | Set to false to disable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data | `false` |
| `connection` | Various connection options described below | |
......
......@@ -10,6 +10,9 @@ module ObjectStorage
UnknownStoreError = Class.new(StandardError)
ObjectStorageUnavailable = Class.new(StandardError)
DIRECT_UPLOAD_TIMEOUT = 4.hours
TMP_UPLOAD_PATH = 'tmp/upload'.freeze
module Store
LOCAL = 1
REMOTE = 2
......@@ -124,6 +127,10 @@ module ObjectStorage
object_store_options.enabled
end
def direct_upload_enabled?
object_store_options.direct_upload
end
def background_upload_enabled?
object_store_options.background_upload
end
......@@ -151,6 +158,45 @@ module ObjectStorage
def serialization_column(model_class, mount_point)
model_class.uploader_options.dig(mount_point, :mount_on) || mount_point
end
def workhorse_authorize
if options = workhorse_remote_upload_options
{ RemoteObject: options }
else
{ TempPath: workhorse_local_upload_path }
end
end
def workhorse_local_upload_path
File.join(self.root, TMP_UPLOAD_PATH)
end
def workhorse_remote_upload_options
return unless self.object_store_enabled?
return unless self.direct_upload_enabled?
id = [CarrierWave.generate_cache_id, SecureRandom.hex].join('-')
upload_path = File.join(TMP_UPLOAD_PATH, id)
connection = ::Fog::Storage.new(self.object_store_credentials)
expire_at = Time.now + DIRECT_UPLOAD_TIMEOUT
options = { 'Content-Type' => 'application/octet-stream' }
{
ID: id,
GetURL: connection.get_object_https_url(remote_store_path, upload_path, expire_at),
DeleteURL: connection.delete_object_url(remote_store_path, upload_path, expire_at),
StoreURL: connection.put_object_url(remote_store_path, upload_path, expire_at, options)
}
end
end
# allow to configure and overwrite the filename
def filename
@filename || super || file&.filename # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def filename=(filename)
@filename = filename # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def file_storage?
......@@ -199,10 +245,6 @@ module ObjectStorage
end
end
def filename
super || file&.filename
end
#
# Move the file to another store
#
......@@ -263,6 +305,18 @@ module ObjectStorage
}
end
def store_workhorse_file!(params, identifier)
filename = params["#{identifier}.name"]
if remote_object_id = params["#{identifier}.remote_id"]
store_remote_file!(remote_object_id, filename)
elsif local_path = params["#{identifier}.path"]
store_local_file!(local_path, filename)
else
raise RemoteStoreError, 'Bad file'
end
end
private
def schedule_background_upload?
......@@ -272,6 +326,38 @@ module ObjectStorage
self.file_storage?
end
def store_remote_file!(remote_object_id, filename)
raise RemoteStoreError, 'Missing filename' unless filename
file_path = File.join(TMP_UPLOAD_PATH, remote_object_id)
file_path = Pathname.new(file_path).cleanpath.to_s
raise RemoteStoreError, 'Bad file path' unless file_path.start_with?(TMP_UPLOAD_PATH + '/')
self.object_store = Store::REMOTE
# TODO:
# This should be changed to make use of `tmp/cache` mechanism
# instead of using custom upload directory,
# using tmp/cache makes this implementation way easier than it is today
CarrierWave::Storage::Fog::File.new(self, storage, file_path).tap do |file|
raise RemoteStoreError, 'Missing file' unless file.exists?
self.filename = filename
self.file = storage.store!(file)
end
end
def store_local_file!(local_path, filename)
raise RemoteStoreError, 'Missing filename' unless filename
root_path = File.realpath(self.class.workhorse_local_upload_path)
file_path = File.realpath(local_path)
raise RemoteStoreError, 'Bad file path' unless file_path.start_with?(root_path)
self.object_store = Store::LOCAL
self.store!(UploadedFile.new(file_path, filename))
end
# this is a hack around CarrierWave. The #migrate method needs to be
# able to force the current file to the migrated file upon success.
def file=(file)
......
---
title: Add support for direct uploading of LFS artifacts
merge_request:
author:
type: added
module StubConfiguration
def stub_object_storage_uploader(config:, uploader:, remote_directory:, enabled: true, licensed: true, background_upload: false)
Fog.mock!
def stub_object_storage_uploader(
config:, uploader:, remote_directory:, enabled: true, licensed: true,
background_upload: false, direct_upload: false
)
allow(config).to receive(:enabled) { enabled }
allow(config).to receive(:background_upload) { background_upload }
allow(config).to receive(:direct_upload) { direct_upload }
stub_licensed_features(object_storage: licensed) unless licensed == :skip
return unless enabled
Fog.mock!
::Fog::Storage.new(uploader.object_store_credentials).tap do |connection|
begin
connection.directories.create(key: remote_directory)
......
This diff is collapsed.
......@@ -53,14 +53,6 @@ module Gitlab
params
end
def lfs_upload_ok(oid, size)
{
StoreLFSPath: LfsObjectUploader.workhorse_upload_path,
LfsOid: oid,
LfsSize: size
}
end
def artifact_upload_ok
{ TempPath: JobArtifactUploader.workhorse_upload_path }
end
......
......@@ -278,6 +278,10 @@ describe Backup::Manager do
connection.directories.create(key: Gitlab.config.backup.upload.remote_directory)
end
after do
Fog.unmock!
end
context 'target path' do
it 'uses the tar filename by default' do
expect_any_instance_of(Fog::Collection).to receive(:create)
......
......@@ -990,22 +990,61 @@ describe 'Git LFS API and storage' do
end
context 'and request is sent by gitlab-workhorse to authorize the request' do
before do
put_authorize
shared_examples 'a valid response' do
before do
put_authorize
end
it 'responds with status 200' do
expect(response).to have_gitlab_http_status(200)
end
it 'uses the gitlab-workhorse content type' do
expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE)
end
end
it 'responds with status 200' do
expect(response).to have_gitlab_http_status(200)
shared_examples 'a local file' do
it_behaves_like 'a valid response' do
it 'responds with status 200, location of lfs store and object details' do
expect(json_response['TempPath']).to eq(LfsObjectUploader.workhorse_local_upload_path)
expect(json_response['RemoteObject']).to be_nil
expect(json_response['LfsOid']).to eq(sample_oid)
expect(json_response['LfsSize']).to eq(sample_size)
end
end
end
it 'uses the gitlab-workhorse content type' do
expect(response.content_type.to_s).to eq(Gitlab::Workhorse::INTERNAL_API_CONTENT_TYPE)
context 'when using local storage' do
it_behaves_like 'a local file'
end
it 'responds with status 200, location of lfs store and object details' do
expect(json_response['StoreLFSPath']).to eq(LfsObjectUploader.workhorse_upload_path)
expect(json_response['LfsOid']).to eq(sample_oid)
expect(json_response['LfsSize']).to eq(sample_size)
context 'when using remote storage' do
context 'when direct upload is enabled' do
before do
stub_lfs_object_storage(enabled: true, direct_upload: true)
end
it_behaves_like 'a valid response' do
it 'responds with status 200, location of lfs remote store and object details' do
expect(json_response['TempPath']).to be_nil
expect(json_response['RemoteObject']).to have_key('ID')
expect(json_response['RemoteObject']).to have_key('GetURL')
expect(json_response['RemoteObject']).to have_key('StoreURL')
expect(json_response['RemoteObject']).to have_key('DeleteURL')
expect(json_response['LfsOid']).to eq(sample_oid)
expect(json_response['LfsSize']).to eq(sample_size)
end
end
end
context 'when direct upload is disabled' do
before do
stub_lfs_object_storage(enabled: true, direct_upload: false)
end
it_behaves_like 'a local file'
end
end
end
......@@ -1037,14 +1076,70 @@ describe 'Git LFS API and storage' do
end
context 'with object storage enabled' do
before do
stub_lfs_object_storage(background_upload: true)
context 'and direct upload enabled' do
let!(:fog_connection) do
stub_lfs_object_storage(direct_upload: true)
end
['123123', '../../123123'].each do |remote_id|
context "with invalid remote_id: #{remote_id}" do
subject do
put_finalize_with_args('file.remote_id' => remote_id)
end
it 'responds with status 403' do
subject
expect(response).to have_gitlab_http_status(403)
end
end
end
context 'with valid remote_id' do
before do
fog_connection.directories.get('lfs-objects').files.create(
key: 'tmp/upload/12312300',
body: 'content'
)
end
subject do
put_finalize_with_args(
'file.remote_id' => '12312300',
'file.name' => 'name')
end
it 'responds with status 200' do
subject
expect(response).to have_gitlab_http_status(200)
end
it 'schedules migration of file to object storage' do
subject
expect(LfsObject.last.projects).to include(project)
end
it 'have valid file' do
subject
expect(LfsObject.last.file_store).to eq(ObjectStorage::Store::REMOTE)
expect(LfsObject.last.file).to be_exists
end
end
end
it 'schedules migration of file to object storage' do
expect(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async).with('LfsObjectUploader', 'LfsObject', :file, kind_of(Numeric))
context 'and background upload enabled' do
before do
stub_lfs_object_storage(background_upload: true)
end
put_finalize(with_tempfile: true)
it 'schedules migration of file to object storage' do
expect(ObjectStorage::BackgroundMoveWorker).to receive(:perform_async).with('LfsObjectUploader', 'LfsObject', :file, kind_of(Numeric))
put_finalize(with_tempfile: true)
end
end
end
end
......@@ -1064,13 +1159,12 @@ describe 'Git LFS API and storage' do
end
context 'invalid tempfiles' do
it 'rejects slashes in the tempfile name (path traversal' do
put_finalize('foo/bar')
expect(response).to have_gitlab_http_status(403)
before do
lfs_object.destroy
end
it 'rejects tempfile names that do not start with the oid' do
put_finalize("foo#{sample_oid}")
it 'rejects slashes in the tempfile name (path traversal)' do
put_finalize('../bar', with_tempfile: true)
expect(response).to have_gitlab_http_status(403)
end
end
......@@ -1160,7 +1254,7 @@ describe 'Git LFS API and storage' do
end
it 'with location of lfs store and object details' do
expect(json_response['StoreLFSPath']).to eq(LfsObjectUploader.workhorse_upload_path)
expect(json_response['TempPath']).to eq(LfsObjectUploader.workhorse_local_upload_path)
expect(json_response['LfsOid']).to eq(sample_oid)
expect(json_response['LfsSize']).to eq(sample_size)
end
......@@ -1263,10 +1357,24 @@ describe 'Git LFS API and storage' do
end
def put_finalize(lfs_tmp = lfs_tmp_file, with_tempfile: false)
setup_tempfile(lfs_tmp) if with_tempfile
upload_path = LfsObjectUploader.workhorse_local_upload_path
file_path = upload_path + '/' + lfs_tmp if lfs_tmp
if with_tempfile
FileUtils.mkdir_p(upload_path)
FileUtils.touch(file_path)
end
args = {
'file.path' => file_path,
'file.name' => File.basename(file_path)
}.compact
put "#{project.http_url_to_repo}/gitlab-lfs/objects/#{sample_oid}/#{sample_size}", nil,
headers.merge('X-Gitlab-Lfs-Tmp' => lfs_tmp).compact
put_finalize_with_args(args)
end
def put_finalize_with_args(args)
put "#{project.http_url_to_repo}/gitlab-lfs/objects/#{sample_oid}/#{sample_size}", args, headers
end
def lfs_tmp_file
......@@ -1274,10 +1382,6 @@ describe 'Git LFS API and storage' do
end
def setup_tempfile(lfs_tmp)
upload_path = LfsObjectUploader.workhorse_upload_path
FileUtils.mkdir_p(upload_path)
FileUtils.touch(File.join(upload_path, lfs_tmp))
end
end
......
......@@ -27,7 +27,7 @@ describe GitlabUploader do
describe '#file_cache_storage?' do
context 'when file storage is used' do
before do
uploader_class.cache_storage(:file)
expect(uploader_class).to receive(:cache_storage) { CarrierWave::Storage::File }
end
it { is_expected.to be_file_cache_storage }
......@@ -35,7 +35,7 @@ describe GitlabUploader do
context 'when is remote storage' do
before do
uploader_class.cache_storage(:fog)
expect(uploader_class).to receive(:cache_storage) { CarrierWave::Storage::Fog }
end
it { is_expected.not_to be_file_cache_storage }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment