Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
7525a79f
Commit
7525a79f
authored
Jun 20, 2018
by
Micaël Bergeron
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
apply feedback
parent
1c16cdec
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
55 additions
and
22 deletions
+55
-22
config/gitlab.yml.example
config/gitlab.yml.example
+2
-2
config/initializers/1_settings.rb
config/initializers/1_settings.rb
+1
-1
config/pseudonymizer.yml
config/pseudonymizer.yml
+2
-2
doc/administration/pseudonymizer.md
doc/administration/pseudonymizer.md
+3
-3
ee/app/helpers/ee/application_settings_helper.rb
ee/app/helpers/ee/application_settings_helper.rb
+5
-1
ee/app/views/admin/application_settings/_pseudonymizer.html.haml
...views/admin/application_settings/_pseudonymizer.html.haml
+1
-1
lib/pseudonymizer/dumper.rb
lib/pseudonymizer/dumper.rb
+19
-10
lib/pseudonymizer/options.rb
lib/pseudonymizer/options.rb
+7
-1
spec/lib/pseudonymizer/dumper_spec.rb
spec/lib/pseudonymizer/dumper_spec.rb
+15
-1
No files found.
config/gitlab.yml.example
View file @
7525a79f
...
...
@@ -734,7 +734,7 @@ production: &base
pseudonymizer:
enabled: false
# Tables manifest that specifies the fields to extract and pseudonymize.
manifest:
lib/pseudonymizer/manifest
.yml
manifest:
config/pseudonymizer
.yml
upload:
# remote_directory: 'gitlab-elt'
# Fog storage connection settings, see http://fog.io/storage/ .
...
...
@@ -897,7 +897,7 @@ test:
path: tmp/tests/backups
pseudonymizer:
enabled: false
manifest:
lib/pseudonymizer/manifest
.yml
manifest:
config/pseudonymizer
.yml
upload:
# The remote 'directory' to store the CSV files. For S3, this would be the bucket name.
remote_directory: gitlab-elt.test
...
...
config/initializers/1_settings.rb
View file @
7525a79f
...
...
@@ -479,7 +479,7 @@ Settings.backup['upload']['storage_class'] ||= nil
#
Settings
[
'pseudonymizer'
]
||=
Settingslogic
.
new
({})
Settings
.
pseudonymizer
[
'enabled'
]
=
false
if
Settings
.
pseudonymizer
[
'enabled'
].
nil?
Settings
.
pseudonymizer
[
'manifest'
]
=
Settings
.
absolute
(
Settings
.
pseudonymizer
[
'manifest'
]
||
Rails
.
root
.
join
(
"
lib/pseudonymizer/manifest
.yml"
))
Settings
.
pseudonymizer
[
'manifest'
]
=
Settings
.
absolute
(
Settings
.
pseudonymizer
[
'manifest'
]
||
Rails
.
root
.
join
(
"
config/pseudonymizer
.yml"
))
Settings
.
pseudonymizer
[
'upload'
]
||=
Settingslogic
.
new
({
'remote_directory'
=>
nil
,
'connection'
=>
nil
})
# Settings.pseudonymizer['upload']['multipart_chunk_size'] ||= 104857600
...
...
lib/pseudonymizer/manifest
.yml
→
config/pseudonymizer
.yml
View file @
7525a79f
...
...
@@ -98,6 +98,8 @@ tables:
-
iid
-
updated_by_id
-
last_edited_by_id
-
title
-
description
issue_assignees
:
whitelist
:
-
user_id
...
...
@@ -617,7 +619,6 @@ tables:
-
has_external_wiki
-
ci_config_path
-
lfs_enabled
-
description_html
-
only_allow_merge_if_all_discussions_are_resolved
-
repository_size_limit
-
printing_merge_request_link_enabled
...
...
@@ -670,7 +671,6 @@ tables:
-
repository_storage
-
repository_read_only
-
ci_config_path
-
description_html
-
only_allow_merge_if_all_discussions_are_resolved
-
repository_size_limit
-
auto_cancel_pending_pipelines
...
...
doc/administration/pseudonymizer.md
View file @
7525a79f
...
...
@@ -21,7 +21,7 @@ be textually exported. This ensures that:
To configure the pseudonymizer, you need to:
-
Provide a manifest file that describes which fields should be included or
pseudonymized (
[
example `manifest.yml` file
](
)).
pseudonymized (
[
example `manifest.yml` file
](
https://gitlab.com/gitlab-org/gitlab-ee/tree/master/config/pseudonymizer.yml
)
).
-
Use an object storage
**For Omnibus installations:**
...
...
@@ -31,7 +31,7 @@ To configure the pseudonymizer, you need to:
```ruby
gitlab_rails['pseudonymizer_enabled'] = true
gitlab_rails['pseudonymizer_manifest'] = '
lib/pseudonymizer/manifest
.yml'
gitlab_rails['pseudonymizer_manifest'] = '
config/pseudonymizer
.yml'
gitlab_rails['pseudonymizer_upload_remote_directory'] = 'gitlab-elt'
gitlab_rails['pseudonymizer_upload_connection'] = {
'provider' => 'AWS',
...
...
@@ -65,7 +65,7 @@ To configure the pseudonymizer, you need to:
```yaml
pseudonymizer:
enabled: true
manifest:
lib/pseudonymizer/manifest
.yml
manifest:
config/pseudonymizer
.yml
upload:
remote_directory: 'gitlab-elt' # The bucket name
connection:
...
...
ee/app/helpers/ee/application_settings_helper.rb
View file @
7525a79f
...
...
@@ -35,8 +35,12 @@ module EE
"and the value is encrypted at rest."
)
end
def
pseudonymizer_enabled_help_text
_
(
"Enable Pseudonymizer data export"
)
end
def
pseudonymizer_description_text
_
(
"GitLab will run the pseudonymizer cron job which will
send
pseudoanonymized data to be processed and analyzed."
)
_
(
"GitLab will run the pseudonymizer cron job which will
output
pseudoanonymized data to be processed and analyzed."
)
end
def
pseudonymizer_disabled_description_text
...
...
ee/app/views/admin/application_settings/_pseudonymizer.html.haml
View file @
7525a79f
...
...
@@ -8,7 +8,7 @@
.form-check
=
f
.
label
:pseudonymizer_enabled
do
=
f
.
check_box
:pseudonymizer_enabled
Enable Pseudonymizer Cron Job
=
pseudonymizer_enabled_help_text
.form-text.text-muted
-
if
is_enabled
=
pseudonymizer_description_text
...
...
lib/pseudonymizer/dumper.rb
View file @
7525a79f
...
...
@@ -69,19 +69,18 @@ module Pseudonymizer
end
class
Anon
def
initialize
(
fields
)
@anon_fields
=
fields
def
initialize
(
table
,
whitelisted_fields
,
pseudonymized_fields
)
@table
=
table
@pseudo_fields
=
pseudo_fields
(
whitelisted_fields
,
pseudonymized_fields
)
end
def
anonymize
(
results
)
columns
=
results
.
columns
# Assume they all have the same table
to_filter
=
@anon_fields
&
columns
key
=
Rails
.
application
.
secrets
[
:secret_key_base
]
digest
=
OpenSSL
::
Digest
.
new
(
'sha256'
)
Enumerator
.
new
do
|
yielder
|
results
.
each
do
|
result
|
to_filter
.
each
do
|
field
|
@pseudo_fields
.
each
do
|
field
|
next
if
result
[
field
].
nil?
result
[
field
]
=
OpenSSL
::
HMAC
.
hexdigest
(
digest
,
key
,
String
(
result
[
field
]))
...
...
@@ -90,6 +89,17 @@ module Pseudonymizer
end
end
end
private
def
pseudo_fields
(
whitelisted
,
pseudonymized
)
pseudo_extra_fields
=
pseudonymized
-
whitelisted
pseudo_extra_fields
.
each
do
|
field
|
Rails
.
logger
.
warn
(
"
#{
self
.
class
.
name
}
extraneous pseudo:
#{
@table
}
.
#{
field
}
is not whitelisted and will be ignored."
)
end
pseudonymized
&
whitelisted
end
end
class
Dumper
...
...
@@ -155,7 +165,7 @@ module Pseudonymizer
# yield every results, pagined, anonymized
def
table_page_results
(
table
,
whitelist_columns
,
pseudonymity_columns
)
anonymizer
=
Anon
.
new
(
pseudonymity_columns
)
anonymizer
=
Anon
.
new
(
table
,
whitelist_columns
,
pseudonymity_columns
)
pager
=
Pager
.
new
(
table
,
whitelist_columns
)
Enumerator
.
new
do
|
yielder
|
...
...
@@ -168,18 +178,17 @@ module Pseudonymizer
end
def
table_to_schema
(
table
)
whitelisted
=
->
(
table
)
{
@config
.
dig
(
:tables
,
table
,
:whitelist
)
}
pseudonymized
=
->
(
table
)
{
@config
.
dig
(
:tables
,
table
,
:pseudo
)
}
table_config
=
@config
.
dig
(
:tables
,
table
)
type_results
=
ActiveRecord
::
Base
.
connection
.
columns
(
table
)
type_results
=
type_results
.
select
do
|
c
|
whitelisted
[
table
].
include?
(
c
.
name
)
table_config
[
:whitelist
].
include?
(
c
.
name
)
end
type_results
=
type_results
.
map
do
|
c
|
data_type
=
c
.
sql_type
if
pseudonymized
[
table
].
include?
(
c
.
name
)
if
table_config
[
:pseudo
].
include?
(
c
.
name
)
data_type
=
"character varying"
end
...
...
lib/pseudonymizer/options.rb
View file @
7525a79f
...
...
@@ -9,10 +9,16 @@ module Pseudonymizer
@start_at
=
Time
.
now
.
utc
base_dir
=
output_dir
||
File
.
join
(
Dir
.
tmpdir
,
'gitlab-pseudonymizer'
)
@output_dir
=
File
.
join
(
base_dir
,
start_at
.
iso8601
)
@output_dir
=
File
.
join
(
base_dir
,
batch_dir
)
end
def
upload_dir
batch_dir
end
private
def
batch_dir
start_at
.
iso8601
end
end
...
...
spec/lib/pseudonymizer/dumper_spec.rb
View file @
7525a79f
...
...
@@ -32,7 +32,7 @@ describe Pseudonymizer::Dumper do
# grab the first table it outputs. There would only be 1.
project_table_file
=
pseudo
.
tables_to_csv
[
0
]
expect
(
project_table_file
).
to
include
(
"projects.csv.gz"
)
expect
(
project_table_file
).
to
end_with
(
"projects.csv.gz"
)
columns
=
[]
project_data
=
[]
...
...
@@ -50,6 +50,20 @@ describe Pseudonymizer::Dumper do
# sha 256 is 64 chars in length
expect
(
project_data
[
"id"
].
length
).
to
eq
(
64
)
end
it
"warns when pseudonymized fields are extraneous"
do
column_names
=
%w(id name path description)
pseudo
.
config
[
:tables
]
=
{
projects:
{
whitelist:
column_names
,
pseudo:
%w(id extraneous)
}
}
expect
(
Rails
.
logger
).
to
receive
(
:warn
).
with
(
/extraneous/
)
pseudo
.
tables_to_csv
end
end
describe
"manifest is valid"
do
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment