Commit 949d1b37 authored by Kamil Trzciński's avatar Kamil Trzciński

Merge branch 'ac/lfs-direct-upload-ee-to-ce' into 'master'

LFS direct upload port to CE

Closes #44548

See merge request gitlab-org/gitlab-ce!17752
parents 092445a4 04c5e637
......@@ -118,9 +118,9 @@ gem 'carrierwave', '~> 1.2'
gem 'dropzonejs-rails', '~> 0.7.1'
# for backups
gem 'fog-aws', '~> 2.0'
gem 'fog-aws', '~> 2.0.1'
gem 'fog-core', '~> 1.44'
gem 'fog-google', '~> 0.5'
gem 'fog-google', '~> 1.3.3'
gem 'fog-local', '~> 0.3'
gem 'fog-openstack', '~> 0.1'
gem 'fog-rackspace', '~> 0.1.1'
......
......@@ -244,10 +244,11 @@ GEM
builder
excon (~> 0.58)
formatador (~> 0.2)
fog-google (0.5.3)
fog-google (1.3.3)
fog-core
fog-json
fog-xml
google-api-client (~> 0.19.1)
fog-json (1.0.2)
fog-core (~> 1.0)
multi_json (~> 1.10)
......@@ -1047,9 +1048,9 @@ DEPENDENCIES
flipper-active_record (~> 0.13.0)
flipper-active_support_cache_store (~> 0.13.0)
fog-aliyun (~> 0.2.0)
fog-aws (~> 2.0)
fog-aws (~> 2.0.1)
fog-core (~> 1.44)
fog-google (~> 0.5)
fog-google (~> 1.3.3)
fog-local (~> 0.3)
fog-openstack (~> 0.1)
fog-rackspace (~> 0.1.1)
......
......@@ -17,20 +17,23 @@ class Projects::LfsStorageController < Projects::GitHttpClientController
def upload_authorize
set_workhorse_internal_api_content_type
render json: Gitlab::Workhorse.lfs_upload_ok(oid, size)
authorized = LfsObjectUploader.workhorse_authorize
authorized.merge!(LfsOid: oid, LfsSize: size)
render json: authorized
end
def upload_finalize
unless tmp_filename
render_lfs_forbidden
return
end
if store_file(oid, size, tmp_filename)
if store_file!(oid, size)
head 200
else
render plain: 'Unprocessable entity', status: 422
end
rescue ActiveRecord::RecordInvalid
render_400
rescue ObjectStorage::RemoteStoreError
render_lfs_forbidden
end
private
......@@ -51,35 +54,28 @@ class Projects::LfsStorageController < Projects::GitHttpClientController
params[:size].to_i
end
def tmp_filename
name = request.headers['X-Gitlab-Lfs-Tmp']
return if name.include?('/')
return unless oid.present? && name.start_with?(oid)
name
end
def store_file!(oid, size)
object = LfsObject.find_by(oid: oid, size: size)
unless object&.file&.exists?
object = create_file!(oid, size)
end
def store_file(oid, size, tmp_file)
# Define tmp_file_path early because we use it in "ensure"
tmp_file_path = File.join(LfsObjectUploader.workhorse_upload_path, tmp_file)
return unless object
object = LfsObject.find_or_create_by(oid: oid, size: size)
file_exists = object.file.exists? || move_tmp_file_to_storage(object, tmp_file_path)
file_exists && link_to_project(object)
ensure
FileUtils.rm_f(tmp_file_path)
link_to_project!(object)
end
def move_tmp_file_to_storage(object, path)
object.file = File.open(path)
object.file.store!
object.save
def create_file!(oid, size)
LfsObject.new(oid: oid, size: size).tap do |object|
object.file.store_workhorse_file!(params, :file)
object.save!
end
end
def link_to_project(object)
def link_to_project!(object)
if object && !object.projects.exists?(storage_project.id)
object.projects << storage_project
object.save
object.save!
end
end
end
......@@ -11,6 +11,12 @@ class LfsObject < ActiveRecord::Base
mount_uploader :file, LfsObjectUploader
before_save :update_file_store
def update_file_store
self.file_store = file.object_store
end
def project_allowed_access?(project)
projects.exists?(project.lfs_storage_project.id)
end
......
......@@ -10,6 +10,9 @@ module ObjectStorage
UnknownStoreError = Class.new(StandardError)
ObjectStorageUnavailable = Class.new(StandardError)
DIRECT_UPLOAD_TIMEOUT = 4.hours
TMP_UPLOAD_PATH = 'tmp/upload'.freeze
module Store
LOCAL = 1
REMOTE = 2
......@@ -124,6 +127,10 @@ module ObjectStorage
object_store_options.enabled
end
def direct_upload_enabled?
object_store_options.direct_upload
end
def background_upload_enabled?
object_store_options.background_upload
end
......@@ -147,6 +154,45 @@ module ObjectStorage
def serialization_column(model_class, mount_point)
model_class.uploader_options.dig(mount_point, :mount_on) || mount_point
end
def workhorse_authorize
if options = workhorse_remote_upload_options
{ RemoteObject: options }
else
{ TempPath: workhorse_local_upload_path }
end
end
def workhorse_local_upload_path
File.join(self.root, TMP_UPLOAD_PATH)
end
def workhorse_remote_upload_options
return unless self.object_store_enabled?
return unless self.direct_upload_enabled?
id = [CarrierWave.generate_cache_id, SecureRandom.hex].join('-')
upload_path = File.join(TMP_UPLOAD_PATH, id)
connection = ::Fog::Storage.new(self.object_store_credentials)
expire_at = Time.now + DIRECT_UPLOAD_TIMEOUT
options = { 'Content-Type' => 'application/octet-stream' }
{
ID: id,
GetURL: connection.get_object_url(remote_store_path, upload_path, expire_at),
DeleteURL: connection.delete_object_url(remote_store_path, upload_path, expire_at),
StoreURL: connection.put_object_url(remote_store_path, upload_path, expire_at, options)
}
end
end
# allow to configure and overwrite the filename
def filename
@filename || super || file&.filename # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def filename=(filename)
@filename = filename # rubocop:disable Gitlab/ModuleWithInstanceVariables
end
def file_storage?
......@@ -195,10 +241,6 @@ module ObjectStorage
end
end
def filename
super || file&.filename
end
#
# Move the file to another store
#
......@@ -253,6 +295,18 @@ module ObjectStorage
}
end
def store_workhorse_file!(params, identifier)
filename = params["#{identifier}.name"]
if remote_object_id = params["#{identifier}.remote_id"]
store_remote_file!(remote_object_id, filename)
elsif local_path = params["#{identifier}.path"]
store_local_file!(local_path, filename)
else
raise RemoteStoreError, 'Bad file'
end
end
private
def schedule_background_upload?
......@@ -261,6 +315,38 @@ module ObjectStorage
self.file_storage?
end
def store_remote_file!(remote_object_id, filename)
raise RemoteStoreError, 'Missing filename' unless filename
file_path = File.join(TMP_UPLOAD_PATH, remote_object_id)
file_path = Pathname.new(file_path).cleanpath.to_s
raise RemoteStoreError, 'Bad file path' unless file_path.start_with?(TMP_UPLOAD_PATH + '/')
self.object_store = Store::REMOTE
# TODO:
# This should be changed to make use of `tmp/cache` mechanism
# instead of using custom upload directory,
# using tmp/cache makes this implementation way easier than it is today
CarrierWave::Storage::Fog::File.new(self, storage, file_path).tap do |file|
raise RemoteStoreError, 'Missing file' unless file.exists?
self.filename = filename
self.file = storage.store!(file)
end
end
def store_local_file!(local_path, filename)
raise RemoteStoreError, 'Missing filename' unless filename
root_path = File.realpath(self.class.workhorse_local_upload_path)
file_path = File.realpath(local_path)
raise RemoteStoreError, 'Bad file path' unless file_path.start_with?(root_path)
self.object_store = Store::LOCAL
self.store!(UploadedFile.new(file_path, filename))
end
# this is a hack around CarrierWave. The #migrate method needs to be
# able to force the current file to the migrated file upon success.
def file=(file)
......
---
title: Port direct upload of LFS artifacts from EE
merge_request: 17752
author:
type: added
......@@ -711,7 +711,7 @@ test:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
region: us-east-1
artifacts:
path: tmp/tests/artifacts
enabled: true
......@@ -725,7 +725,7 @@ test:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
region: us-east-1
uploads:
storage_path: tmp/tests/public
object_store:
......@@ -734,7 +734,7 @@ test:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
region: us-east-1
gitlab:
host: localhost
port: 80
......
......@@ -350,6 +350,7 @@ Settings.lfs['storage_path'] = Settings.absolute(Settings.lfs['storage_path'] ||
Settings.lfs['object_store'] ||= Settingslogic.new({})
Settings.lfs['object_store']['enabled'] = false if Settings.lfs['object_store']['enabled'].nil?
Settings.lfs['object_store']['remote_directory'] ||= nil
Settings.lfs['object_store']['direct_upload'] = false if Settings.lfs['object_store']['direct_upload'].nil?
Settings.lfs['object_store']['background_upload'] = true if Settings.lfs['object_store']['background_upload'].nil?
Settings.lfs['object_store']['proxy_download'] = false if Settings.lfs['object_store']['proxy_download'].nil?
# Convert upload connection settings to use string keys, to make Fog happy
......
......@@ -28,16 +28,4 @@ if File.exist?(aws_file)
# when fog_public is false and provider is AWS or Google, defaults to 600
config.fog_authenticated_url_expiration = 1 << 29
end
# Mocking Fog requests, based on: https://github.com/carrierwaveuploader/carrierwave/wiki/How-to%3A-Test-Fog-based-uploaders
if Rails.env.test?
Fog.mock!
connection = ::Fog::Storage.new(
aws_access_key_id: AWS_CONFIG['access_key_id'],
aws_secret_access_key: AWS_CONFIG['secret_access_key'],
provider: 'AWS',
region: AWS_CONFIG['region']
)
connection.directories.create(key: AWS_CONFIG['bucket'])
end
end
......@@ -63,6 +63,7 @@ For source installations the following settings are nested under `lfs:` and then
|---------|-------------|---------|
| `enabled` | Enable/disable object storage | `false` |
| `remote_directory` | The bucket name where LFS objects will be stored| |
| `direct_upload` | Set to true to enable direct upload of LFS without the need of local shared storage. Option may be removed once we decide to support only single storage for all files. | `false` |
| `background_upload` | Set to false to disable automatic upload. Option may be removed once upload is direct to S3 | `true` |
| `proxy_download` | Set to true to enable proxying all files served. Option allows to reduce egress traffic as this allows clients to download directly from remote storage instead of proxying all data | `false` |
| `connection` | Various connection options described below | |
......
......@@ -39,14 +39,6 @@ module Gitlab
params
end
def lfs_upload_ok(oid, size)
{
StoreLFSPath: LfsObjectUploader.workhorse_upload_path,
LfsOid: oid,
LfsSize: size
}
end
def artifact_upload_ok
{ TempPath: JobArtifactUploader.workhorse_upload_path }
end
......
......@@ -278,6 +278,10 @@ describe Backup::Manager do
connection.directories.create(key: Gitlab.config.backup.upload.remote_directory)
end
after do
Fog.unmock!
end
context 'target path' do
it 'uses the tar filename by default' do
expect_any_instance_of(Fog::Collection).to receive(:create)
......
This diff is collapsed.
module StubConfiguration
def stub_object_storage_uploader(
config:, uploader:, remote_directory:,
config:,
uploader:,
remote_directory:,
enabled: true,
proxy_download: false,
background_upload: false)
Fog.mock!
background_upload: false,
direct_upload: false
)
allow(config).to receive(:enabled) { enabled }
allow(config).to receive(:proxy_download) { proxy_download }
allow(config).to receive(:background_upload) { background_upload }
allow(config).to receive(:direct_upload) { direct_upload }
return unless enabled
Fog.mock!
::Fog::Storage.new(uploader.object_store_credentials).tap do |connection|
begin
connection.directories.create(key: remote_directory)
......
......@@ -27,7 +27,7 @@ describe GitlabUploader do
describe '#file_cache_storage?' do
context 'when file storage is used' do
before do
uploader_class.cache_storage(:file)
expect(uploader_class).to receive(:cache_storage) { CarrierWave::Storage::File }
end
it { is_expected.to be_file_cache_storage }
......@@ -35,7 +35,7 @@ describe GitlabUploader do
context 'when is remote storage' do
before do
uploader_class.cache_storage(:fog)
expect(uploader_class).to receive(:cache_storage) { CarrierWave::Storage::Fog }
end
it { is_expected.not_to be_file_cache_storage }
......
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment