Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
51da5736
Commit
51da5736
authored
Jun 04, 2018
by
Micaël Bergeron
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
adds the object storage configuration
parent
0f52db38
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
135 additions
and
40 deletions
+135
-40
app/workers/gitlab_elt_data_dump_worker.rb
app/workers/gitlab_elt_data_dump_worker.rb
+2
-2
config/gitlab.yml.example
config/gitlab.yml.example
+26
-0
config/initializers/1_settings.rb
config/initializers/1_settings.rb
+9
-1
lib/pseudonymity/manifest.yml
lib/pseudonymity/manifest.yml
+1
-28
lib/pseudonymity/table.rb
lib/pseudonymity/table.rb
+9
-9
lib/pseudonymity/upload_service.rb
lib/pseudonymity/upload_service.rb
+73
-0
lib/tasks/gitlab/db.rake
lib/tasks/gitlab/db.rake
+15
-0
No files found.
app/workers/gitlab_elt_data_dump_worker.rb
View file @
51da5736
class
GitlabE
LT
DataDumpWorker
class
GitlabE
lt
DataDumpWorker
include
ApplicationWorker
include
CronjobQueue
...
...
config/gitlab.yml.example
View file @
51da5736
...
...
@@ -726,6 +726,21 @@ production: &base
# # Specifies Amazon S3 storage class to use for backups, this is optional
# # storage_class: 'STANDARD'
## Pseudonym exporter
pseudonymizer:
# Tables manifest that specifies the fields to extract and pseudonymize.
# TODO: link to meltano configuration?
manifest: config/pseudonymizer.yml
upload:
# Fog storage connection settings, see http://fog.io/storage/ .
connection:
# provider: AWS
# region: eu-west-1
# aws_access_key_id: AKIAKIAKI
# aws_secret_access_key: 'secret123'
# # The remote 'directory' to store the CSV files. For S3, this would be the bucket name.
# remote_directory: 'gitlab-elt'
## GitLab Shell settings
gitlab_shell:
path: /home/git/gitlab-shell/
...
...
@@ -876,6 +891,17 @@ test:
token: secret
backup:
path: tmp/tests/backups
pseudonymizer:
manifest: config/pseudonymizer.test.yml
upload:
# The remote 'directory' to store the CSV files. For S3, this would be the bucket name.
remote_directory: gitlab-elt.test
# Fog storage connection settings, see http://fog.io/storage/
connection:
provider: AWS
region: us-east-1
aws_access_key_id: AWS_ACCESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
gitlab_shell:
path: tmp/tests/gitlab-shell/
hooks_path: tmp/tests/gitlab-shell/hooks/
...
...
config/initializers/1_settings.rb
View file @
51da5736
...
...
@@ -373,7 +373,7 @@ Settings.cron_jobs['gitlab_usage_ping_worker']['job_class'] = 'GitlabUsagePingWo
Settings
.
cron_jobs
[
'gitlab_elt_database_dump'
]
||=
Settingslogic
.
new
({})
Settings
.
cron_jobs
[
'gitlab_elt_database_dump'
][
'cron'
]
||=
'0 23 * * *'
;
Settings
.
cron_jobs
[
'gitlab_elt_database_dump'
][
'job_class'
]
||=
'GitlabE
LT
DataDumpWorker'
;
Settings
.
cron_jobs
[
'gitlab_elt_database_dump'
][
'job_class'
]
||=
'GitlabE
lt
DataDumpWorker'
;
Settings
.
cron_jobs
[
'schedule_update_user_activity_worker'
]
||=
Settingslogic
.
new
({})
Settings
.
cron_jobs
[
'schedule_update_user_activity_worker'
][
'cron'
]
||=
'30 0 * * *'
...
...
@@ -475,6 +475,14 @@ Settings.backup['upload']['multipart_chunk_size'] ||= 104857600
Settings
.
backup
[
'upload'
][
'encryption'
]
||=
nil
Settings
.
backup
[
'upload'
][
'storage_class'
]
||=
nil
#
# Pseudonymizer
#
Settings
[
'pseudonymizer'
]
||=
Settingslogic
.
new
({})
Settings
.
pseudonymizer
[
'manifest'
]
=
Settings
.
pseudonymizer
[
'manifest'
]
||
"lib/pseudonymity/manifest.yml"
Settings
.
pseudonymizer
[
'upload'
]
||=
Settingslogic
.
new
({
'remote_directory'
=>
nil
,
'connection'
=>
nil
})
# Settings.pseudonymizer['upload']['multipart_chunk_size'] ||= 104857600
#
# Git
#
...
...
ee/lib/assets/pseudonymity_dump
.yml
→
lib/pseudonymity/manifest
.yml
View file @
51da5736
...
...
@@ -156,7 +156,6 @@ tables:
-
last_edited_by_id
-
discussion_locked
-
closed_at
-
closed_by_id
pseudo
:
-
id
-
title
...
...
@@ -487,8 +486,6 @@ tables:
-
merge_merge_request
-
failed_pipeline
-
success_pipeline
-
push_to_merge_request
-
issue_due
pseudo
:
-
id
-
user_id
...
...
@@ -509,8 +506,6 @@ tables:
-
merge_merge_request
-
failed_pipeline
-
success_pipeline
-
push_to_merge_request
-
issue_due
project_authorizations
:
whitelist
:
-
user_id
...
...
@@ -535,15 +530,6 @@ tables:
-
updated_at
-
enabled
-
domain
project_ci_cd_settings
:
whitelist
:
-
id
-
project_id
-
group_runners_enabled
pseudo
:
-
id
-
project_id
-
group_runners_enabled
project_custom_attributes
:
whitelist
:
-
id
...
...
@@ -559,17 +545,6 @@ tables:
-
project_id
-
key
-
value
project_deploy_tokens
:
whitelist
:
-
id
-
project_id
-
deploy_token_id
-
created_at
pseudo
:
-
id
-
project_id
-
deploy_token_id
-
created_at
project_features
:
whitelist
:
-
id
...
...
@@ -750,7 +725,6 @@ tables:
-
mirror_overwrites_diverged_branches
-
external_authorization_classification_label
-
external_webhook_token
-
pages_https_only
pseudo
:
-
id
-
name
...
...
@@ -820,7 +794,6 @@ tables:
-
mirror_overwrites_diverged_branches
-
external_authorization_classification_label
-
external_webhook_token
-
pages_https_only
subscriptions
:
whitelist
:
-
id
...
...
ee/
lib/pseudonymity/table.rb
→
lib/pseudonymity/table.rb
View file @
51da5736
...
...
@@ -27,27 +27,26 @@ module Pseudonymity
class
Table
attr_accessor
:config
attr_accessor
:output_dir
def
initialize
@config
=
{}
@csv_output
=
""
parse_config
@config
=
parse_config
@output_dir
=
""
@schema
=
{}
@output_files
=
[]
end
def
tables_to_csv
tables
=
config
[
"tables"
]
@csv_output
=
config
[
"output"
][
"csv"
]
unless
File
.
directory?
(
@csv_output
)
raise
"No such directory
#{
@csv_output
}
"
end
@output_dir
=
File
.
join
(
"/tmp/"
,
SecureRandom
.
hex
)
Dir
.
mkdir
(
@output_dir
)
unless
File
.
directory?
(
@output_dir
)
new_tables
=
tables
.
map
do
|
k
,
v
|
@schema
[
k
]
=
{}
table_to_csv
(
k
,
v
[
"whitelist"
],
v
[
"pseudo"
])
end
schema_to_yml
file_list_to_json
new_tables
...
...
@@ -57,7 +56,7 @@ module Pseudonymity
file_timestamp
=
filename
||
"
#{
prefix
}
_
#{
Time
.
now
.
to_i
}
"
file_timestamp
=
"
#{
file_timestamp
}
.
#{
ext
}
"
@output_files
<<
file_timestamp
File
.
join
(
@
csv_output
,
file_timestamp
)
File
.
join
(
@
output_dir
,
file_timestamp
)
end
def
schema_to_yml
...
...
@@ -103,10 +102,11 @@ module Pseudonymity
end
def
parse_config
@config
=
YAML
.
load_file
(
Rails
.
root
.
join
(
'./ee/lib/assets/pseudonymity_dump.yml'
))
YAML
.
load_file
(
Rails
.
root
.
join
(
Gitlab
.
config
.
pseudonymizer
.
manifest
))
end
def
write_to_csv_file
(
title
,
contents
)
Rails
.
logger
.
info
"Writing
#{
title
}
..."
file_path
=
get_and_log_file_name
(
"csv"
,
title
)
column_names
=
contents
.
first
.
keys
contents
=
CSV
.
generate
do
|
csv
|
...
...
lib/pseudonymity/upload_service.rb
0 → 100644
View file @
51da5736
module
Pseudonymity
class
UploadService
RemoteStorageUnavailableError
=
Class
.
new
(
StandardError
)
def
initialize
(
output_dir
,
progress
)
@progress
=
progress
@output_dir
=
output_dir
end
def
upload
progress
.
print
"Uploading backup archive to remote storage
#{
remote_directory
}
... "
file_list
.
each
do
|
file
|
upload_file
(
file
,
remote_directory
)
end
end
def
upload_file
(
file
,
directory
)
progress
.
print
"
\t
Uploading
#{
file
}
... "
if
directory
.
files
.
create
(
key:
File
.
basename
(
file
),
body:
File
.
open
(
file
),
public:
false
)
progress
.
puts
"done"
.
color
(
:green
)
else
puts
"uploading CSV to
#{
remote_directory
}
failed"
.
color
(
:red
)
end
end
def
cleanup
progress
.
print
"Deleting tmp directory
#{
@output_dir
}
... "
return
unless
File
.
exist?
(
@output_dir
)
if
FileUtils
.
rm_rf
(
@output_dir
)
progress
.
puts
"done"
.
color
(
:green
)
else
progress
.
puts
"failed"
.
color
(
:red
)
end
end
private
def
config
Gitlab
.
config
.
pseudonymizer
end
def
remote_directory
connection_settings
=
config
.
upload
.
connection
if
connection_settings
.
blank?
progress
.
puts
"Cannot upload files, make sure the `pseudonimizer.upload.connection` is set properly"
.
color
(
:red
)
raise
RemoteStorageUnavailableError
.
new
(
connection_settings
)
end
connect_to_remote_directory
(
connection_settings
)
end
def
connect_to_remote_directory
(
connection_settings
)
# our settings use string keys, but Fog expects symbols
connection
=
::
Fog
::
Storage
.
new
(
connection_settings
.
symbolize_keys
)
remote_dir
=
config
.
upload
.
remote_directory
# We only attempt to create the directory for local backups. For AWS
# and other cloud providers, we cannot guarantee the user will have
# permission to create the bucket.
if
connection
.
service
==
::
Fog
::
Storage
::
Local
connection
.
directories
.
create
(
key:
remote_dir
)
else
connection
.
directories
.
get
(
remote_dir
)
end
end
def
file_list
Dir
[
File
.
join
(
@output_dir
,
"*.{csv,yml}"
)]
end
end
end
lib/tasks/gitlab/db.rake
View file @
51da5736
...
...
@@ -78,6 +78,21 @@ namespace :gitlab do
task
pseudonymity_dump: :environment
do
table
=
Pseudonymity
::
Table
.
new
table
.
tables_to_csv
upload
=
Pseudonymity
::
UploadService
.
new
(
table
.
output_dir
,
progress
)
upload
.
upload
upload
.
cleanup
end
def
progress
if
ENV
[
'CRON'
]
# We need an object we can say 'puts' and 'print' to; let's use a
# StringIO.
require
'stringio'
StringIO
.
new
else
$stdout
end
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment