Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
184f612d
Commit
184f612d
authored
Jun 19, 2018
by
Micaël Bergeron
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
apply feedback
parent
679240a1
Changes
11
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
78 additions
and
56 deletions
+78
-56
app/workers/pseudonymizer_worker.rb
app/workers/pseudonymizer_worker.rb
+1
-1
config/initializers/1_settings.rb
config/initializers/1_settings.rb
+1
-1
db/schema.rb
db/schema.rb
+1
-1
doc/administration/pseudonymizer.md
doc/administration/pseudonymizer.md
+1
-1
ee/app/models/license.rb
ee/app/models/license.rb
+1
-1
ee/db/migrate/20180531221734_add_pseudonymizer_enabled_to_application_settings.rb
...1734_add_pseudonymizer_enabled_to_application_settings.rb
+1
-1
lib/pseudonymizer/dumper.rb
lib/pseudonymizer/dumper.rb
+67
-17
lib/pseudonymizer/manifest.yml
lib/pseudonymizer/manifest.yml
+0
-28
lib/tasks/gitlab/db.rake
lib/tasks/gitlab/db.rake
+3
-3
spec/lib/pseudonymizer/dumper_spec.rb
spec/lib/pseudonymizer/dumper_spec.rb
+1
-1
spec/lib/pseudonymizer/uploader_spec.rb
spec/lib/pseudonymizer/uploader_spec.rb
+1
-1
No files found.
app/workers/pseudonymizer_worker.rb
View file @
184f612d
...
...
@@ -6,7 +6,7 @@ class PseudonymizerWorker
return
unless
Gitlab
::
CurrentSettings
.
pseudonymizer_enabled?
options
=
Pseudonymizer
::
Options
.
new
(
config:
YAML
.
load_file
(
Rails
.
root
.
join
(
Gitlab
.
config
.
pseudonymizer
.
manifest
)
),
config:
YAML
.
load_file
(
Gitlab
.
config
.
pseudonymizer
.
manifest
),
output_dir:
ENV
[
'PSEUDONYMIZER_OUTPUT_DIR'
]
)
...
...
config/initializers/1_settings.rb
View file @
184f612d
...
...
@@ -479,7 +479,7 @@ Settings.backup['upload']['storage_class'] ||= nil
#
Settings
[
'pseudonymizer'
]
||=
Settingslogic
.
new
({})
Settings
.
pseudonymizer
[
'enabled'
]
=
false
if
Settings
.
pseudonymizer
[
'enabled'
].
nil?
Settings
.
pseudonymizer
[
'manifest'
]
=
Settings
.
pseudonymizer
[
'manifest'
]
||
"lib/pseudonymizer/manifest.yml"
Settings
.
pseudonymizer
[
'manifest'
]
=
Settings
.
absolute
(
Settings
.
pseudonymizer
[
'manifest'
]
||
Rails
.
root
.
join
(
"lib/pseudonymizer/manifest.yml"
))
Settings
.
pseudonymizer
[
'upload'
]
||=
Settingslogic
.
new
({
'remote_directory'
=>
nil
,
'connection'
=>
nil
})
# Settings.pseudonymizer['upload']['multipart_chunk_size'] ||= 104857600
...
...
db/schema.rb
View file @
184f612d
...
...
@@ -206,7 +206,7 @@ ActiveRecord::Schema.define(version: 20180612175636) do
t
.
string
"encrypted_external_auth_client_key_pass_iv"
t
.
string
"email_additional_text"
t
.
boolean
"enforce_terms"
,
default:
false
t
.
boolean
"pseudonymizer_enabled"
t
.
boolean
"pseudonymizer_enabled"
,
default:
false
,
null:
false
end
create_table
"approvals"
,
force: :cascade
do
|
t
|
...
...
doc/administration/pseudonymizer.md
View file @
184f612d
...
...
@@ -70,7 +70,7 @@ To configure the pseudonymizer, you need to:
remote_directory: 'gitlab-elt' # The bucket name
connection:
provider: AWS # Only AWS supported at the moment
aws_access_key_id: AWS_ACESS_KEY_ID
aws_access_key_id: AWS_AC
C
ESS_KEY_ID
aws_secret_access_key: AWS_SECRET_ACCESS_KEY
region: eu-central-1
```
...
...
ee/app/models/license.rb
View file @
184f612d
...
...
@@ -31,7 +31,6 @@ class License < ActiveRecord::Base
repository_mirrors
repository_size_limit
scoped_issue_board
pseudonymizer
]
.
freeze
EEP_FEATURES
=
EES_FEATURES
+
%i[
...
...
@@ -74,6 +73,7 @@ class License < ActiveRecord::Base
ide
chatops
pod_logs
pseudonymizer
]
.
freeze
# List all features available for early adopters,
...
...
ee/db/migrate/20180531221734_add_pseudonymizer_enabled_to_application_settings.rb
View file @
184f612d
...
...
@@ -26,6 +26,6 @@ class AddPseudonymizerEnabledToApplicationSettings < ActiveRecord::Migration
# disable_ddl_transaction!
def
change
add_column
:application_settings
,
:pseudonymizer_enabled
,
:boolean
add_column
:application_settings
,
:pseudonymizer_enabled
,
:boolean
,
null:
false
,
default:
false
end
end
lib/pseudonymizer/dumper.rb
View file @
184f612d
...
...
@@ -4,7 +4,69 @@ require 'csv'
require
'yaml'
module
Pseudonymizer
PAGE_SIZE
=
ENV
.
fetch
(
'PSEUDONYMIZER_BATCH'
,
100_000
)
class
Pager
PAGE_SIZE
=
ENV
.
fetch
(
'PSEUDONYMIZER_BATCH'
,
100_000
)
def
initialize
(
table
,
columns
)
@table
=
table
@columns
=
columns
end
def
pages
(
&
block
)
if
@columns
.
include?
(
"id"
)
# optimize the pagination using WHERE id > ?
pages_per_id
(
&
block
)
else
# fallback to `LIMIT ? OFFSET ?` when "id" is unavailable
pages_per_offset
(
&
block
)
end
end
def
pages_per_id
(
&
block
)
id_offset
=
0
loop
do
# a page of results
results
=
ActiveRecord
::
Base
.
connection
.
exec_query
(
<<-
SQL
.
squish
)
SELECT
#{
@columns
.
join
(
","
)
}
FROM
#{
@table
}
WHERE id >
#{
id_offset
}
ORDER BY id
LIMIT
#{
PAGE_SIZE
}
SQL
Rails
.
logger
.
debug
(
"
#{
self
.
class
.
name
}
fetch ids [
#{
id_offset
}
, +
#{
PAGE_SIZE
}
["
)
break
if
results
.
empty?
id_offset
=
results
.
last
[
"id"
].
to_i
yield
results
break
if
results
.
count
<
PAGE_SIZE
end
end
def
pages_per_offset
(
&
block
)
page
=
0
loop
do
offset
=
page
*
PAGE_SIZE
# a page of results
results
=
ActiveRecord
::
Base
.
connection
.
exec_query
(
<<-
SQL
.
squish
)
SELECT
#{
@columns
.
join
(
","
)
}
FROM
#{
@table
}
ORDER BY
#{
@columns
.
join
(
","
)
}
LIMIT
#{
PAGE_SIZE
}
OFFSET
#{
offset
}
SQL
Rails
.
logger
.
debug
(
"
#{
self
.
class
.
name
}
fetching offset [
#{
offset
}
,
#{
offset
+
PAGE_SIZE
}
["
)
break
if
results
.
empty?
page
+=
1
yield
results
break
if
results
.
count
<
PAGE_SIZE
end
end
end
class
Anon
def
initialize
(
fields
)
...
...
@@ -47,7 +109,7 @@ module Pseudonymizer
end
def
tables_to_csv
re
set!
re
turn
@output_files
if
@output_files
tables
=
config
[
:tables
]
FileUtils
.
mkdir_p
(
output_dir
)
unless
File
.
directory?
(
output_dir
)
...
...
@@ -94,25 +156,13 @@ module Pseudonymizer
# yield every results, pagined, anonymized
def
table_page_results
(
table
,
whitelist_columns
,
pseudonymity_columns
)
anonymizer
=
Anon
.
new
(
pseudonymity_columns
)
page
=
0
page
r
=
Pager
.
new
(
table
,
whitelist_columns
)
Enumerator
.
new
do
|
yielder
|
loop
do
offset
=
page
*
PAGE_SIZE
has_more
=
false
sql
=
"SELECT
#{
whitelist_columns
.
join
(
","
)
}
FROM
#{
table
}
LIMIT
#{
PAGE_SIZE
}
OFFSET
#{
offset
}
"
# a page of results
results
=
ActiveRecord
::
Base
.
connection
.
exec_query
(
sql
)
anonymizer
.
anonymize
(
results
).
each
do
|
result
|
has_more
=
true
pager
.
pages
do
|
page
|
anonymizer
.
anonymize
(
page
).
each
do
|
result
|
yielder
<<
result
end
raise
StopIteration
unless
has_more
page
+=
1
end
end
.
lazy
end
...
...
lib/pseudonymizer/manifest.yml
View file @
184f612d
...
...
@@ -209,34 +209,6 @@ tables:
-
updated_at
pseudo
:
-
id
merge_request_diff_commits
:
whitelist
:
-
authored_date
-
committed_date
-
merge_request_diff_id
-
relative_order
-
author_name
-
author_email
-
committer_name
-
committer_email
pseudo
:
-
merge_request_diff_id
-
author_name
-
author_email
-
committer_name
-
committer_email
merge_request_diff_files
:
whitelist
:
-
merge_request_diff_id
-
relative_order
-
new_file
-
renamed_file
-
deleted_file
-
too_large
-
a_mode
-
b_mode
pseudo
:
-
merge_request_diff_id
merge_request_diffs
:
whitelist
:
-
id
...
...
lib/tasks/gitlab/db.rake
View file @
184f612d
...
...
@@ -72,11 +72,11 @@ namespace :gitlab do
desc
'Output pseudonymity dump of selected tables'
task
pseudonymizer: :environment
do
abort
"The pseudonymizer is not available with this license."
unless
License
.
feature_available?
(
:pseudonymizer
)
abort
"The pseudonymizer is disabled."
unless
Gitlab
::
CurrentSettings
.
pseudonymizer_enabled?
#
abort "The pseudonymizer is not available with this license." unless License.feature_available?(:pseudonymizer)
#
abort "The pseudonymizer is disabled." unless Gitlab::CurrentSettings.pseudonymizer_enabled?
options
=
Pseudonymizer
::
Options
.
new
(
config:
YAML
.
load_file
(
Rails
.
root
.
join
(
Gitlab
.
config
.
pseudonymizer
.
manifest
)
),
config:
YAML
.
load_file
(
Gitlab
.
config
.
pseudonymizer
.
manifest
),
output_dir:
ENV
[
'PSEUDONYMIZER_OUTPUT_DIR'
]
)
...
...
spec/lib/pseudonymizer/dumper_spec.rb
View file @
184f612d
...
...
@@ -5,7 +5,7 @@ describe Pseudonymizer::Dumper do
let
(
:base_dir
)
{
Dir
.
mktmpdir
}
let
(
:options
)
do
Pseudonymizer
::
Options
.
new
(
config:
YAML
.
load_file
(
Rails
.
root
.
join
(
Gitlab
.
config
.
pseudonymizer
.
manifest
)
)
config:
YAML
.
load_file
(
Gitlab
.
config
.
pseudonymizer
.
manifest
)
)
end
subject
(
:pseudo
)
{
described_class
.
new
(
options
)
}
...
...
spec/lib/pseudonymizer/uploader_spec.rb
View file @
184f612d
...
...
@@ -4,7 +4,7 @@ describe Pseudonymizer::Uploader do
let
(
:base_dir
)
{
Dir
.
mktmpdir
}
let
(
:options
)
do
Pseudonymizer
::
Options
.
new
(
config:
YAML
.
load_file
(
Rails
.
root
.
join
(
Gitlab
.
config
.
pseudonymizer
.
manifest
)
)
config:
YAML
.
load_file
(
Gitlab
.
config
.
pseudonymizer
.
manifest
)
)
end
let
(
:remote_directory
)
{
subject
.
send
(
:remote_directory
)
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment