Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
G
gitlab-ce
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
1
Merge Requests
1
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
nexedi
gitlab-ce
Commits
879c6586
Commit
879c6586
authored
Jun 20, 2018
by
Micaël Bergeron
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add specs for `Pseudonymizer::Pager`
parent
dc700967
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
165 additions
and
105 deletions
+165
-105
lib/pseudonymizer/dumper.rb
lib/pseudonymizer/dumper.rb
+2
-105
lib/pseudonymizer/filter.rb
lib/pseudonymizer/filter.rb
+38
-0
lib/pseudonymizer/pager.rb
lib/pseudonymizer/pager.rb
+63
-0
spec/lib/pseudonymizer/pager_spec.rb
spec/lib/pseudonymizer/pager_spec.rb
+62
-0
No files found.
lib/pseudonymizer/dumper.rb
View file @
879c6586
require
'openssl'
require
'digest'
require
'csv'
require
'yaml'
module
Pseudonymizer
class
Pager
PAGE_SIZE
=
ENV
.
fetch
(
'PSEUDONYMIZER_BATCH'
,
100_000
)
def
initialize
(
table
,
columns
)
@table
=
table
@columns
=
columns
end
def
pages
(
&
block
)
if
@columns
.
include?
(
"id"
)
# optimize the pagination using WHERE id > ?
pages_per_id
(
&
block
)
else
# fallback to `LIMIT ? OFFSET ?` when "id" is unavailable
pages_per_offset
(
&
block
)
end
end
def
pages_per_id
(
&
block
)
id_offset
=
0
loop
do
# a page of results
results
=
ActiveRecord
::
Base
.
connection
.
exec_query
(
<<-
SQL
.
squish
)
SELECT
#{
@columns
.
join
(
","
)
}
FROM
#{
@table
}
WHERE id >
#{
id_offset
}
ORDER BY id
LIMIT
#{
PAGE_SIZE
}
SQL
Rails
.
logger
.
debug
(
"
#{
self
.
class
.
name
}
fetch ids [
#{
id_offset
}
, +
#{
PAGE_SIZE
}
["
)
break
if
results
.
empty?
id_offset
=
results
.
last
[
"id"
].
to_i
yield
results
break
if
results
.
count
<
PAGE_SIZE
end
end
def
pages_per_offset
(
&
block
)
page
=
0
loop
do
offset
=
page
*
PAGE_SIZE
# a page of results
results
=
ActiveRecord
::
Base
.
connection
.
exec_query
(
<<-
SQL
.
squish
)
SELECT
#{
@columns
.
join
(
","
)
}
FROM
#{
@table
}
ORDER BY
#{
@columns
.
join
(
","
)
}
LIMIT
#{
PAGE_SIZE
}
OFFSET
#{
offset
}
SQL
Rails
.
logger
.
debug
(
"
#{
self
.
class
.
name
}
fetching offset [
#{
offset
}
,
#{
offset
+
PAGE_SIZE
}
["
)
break
if
results
.
empty?
page
+=
1
yield
results
break
if
results
.
count
<
PAGE_SIZE
end
end
end
class
Anon
def
initialize
(
table
,
whitelisted_fields
,
pseudonymized_fields
)
@table
=
table
@pseudo_fields
=
pseudo_fields
(
whitelisted_fields
,
pseudonymized_fields
)
end
def
anonymize
(
results
)
key
=
Rails
.
application
.
secrets
[
:secret_key_base
]
digest
=
OpenSSL
::
Digest
.
new
(
'sha256'
)
Enumerator
.
new
do
|
yielder
|
results
.
each
do
|
result
|
@pseudo_fields
.
each
do
|
field
|
next
if
result
[
field
].
nil?
result
[
field
]
=
OpenSSL
::
HMAC
.
hexdigest
(
digest
,
key
,
String
(
result
[
field
]))
end
yielder
<<
result
end
end
end
private
def
pseudo_fields
(
whitelisted
,
pseudonymized
)
pseudo_extra_fields
=
pseudonymized
-
whitelisted
pseudo_extra_fields
.
each
do
|
field
|
Rails
.
logger
.
warn
(
"
#{
self
.
class
.
name
}
extraneous pseudo:
#{
@table
}
.
#{
field
}
is not whitelisted and will be ignored."
)
end
pseudonymized
&
whitelisted
end
end
class
Dumper
attr_accessor
:config
,
:output_dir
...
...
@@ -165,12 +62,12 @@ module Pseudonymizer
# yield every results, pagined, anonymized
def
table_page_results
(
table
,
whitelist_columns
,
pseudonymity_columns
)
anonymizer
=
Anon
.
new
(
table
,
whitelist_columns
,
pseudonymity_columns
)
filter
=
Filter
.
new
(
table
,
whitelist_columns
,
pseudonymity_columns
)
pager
=
Pager
.
new
(
table
,
whitelist_columns
)
Enumerator
.
new
do
|
yielder
|
pager
.
pages
do
|
page
|
anonymiz
er
.
anonymize
(
page
).
each
do
|
result
|
filt
er
.
anonymize
(
page
).
each
do
|
result
|
yielder
<<
result
end
end
...
...
lib/pseudonymizer/filter.rb
0 → 100644
View file @
879c6586
require
'openssl'
require
'digest'
module
Pseudonymizer
class
Filter
def
initialize
(
table
,
whitelisted_fields
,
pseudonymized_fields
)
@table
=
table
@pseudo_fields
=
pseudo_fields
(
whitelisted_fields
,
pseudonymized_fields
)
end
def
anonymize
(
results
)
key
=
Rails
.
application
.
secrets
[
:secret_key_base
]
digest
=
OpenSSL
::
Digest
.
new
(
'sha256'
)
Enumerator
.
new
do
|
yielder
|
results
.
each
do
|
result
|
@pseudo_fields
.
each
do
|
field
|
next
if
result
[
field
].
nil?
result
[
field
]
=
OpenSSL
::
HMAC
.
hexdigest
(
digest
,
key
,
String
(
result
[
field
]))
end
yielder
<<
result
end
end
end
private
def
pseudo_fields
(
whitelisted
,
pseudonymized
)
pseudo_extra_fields
=
pseudonymized
-
whitelisted
pseudo_extra_fields
.
each
do
|
field
|
Rails
.
logger
.
warn
(
"
#{
self
.
class
.
name
}
extraneous pseudo:
#{
@table
}
.
#{
field
}
is not whitelisted and will be ignored."
)
end
pseudonymized
&
whitelisted
end
end
end
lib/pseudonymizer/pager.rb
0 → 100644
View file @
879c6586
module
Pseudonymizer
class
Pager
PAGE_SIZE
=
ENV
.
fetch
(
'PSEUDONYMIZER_BATCH'
,
100_000
)
def
initialize
(
table
,
columns
)
@table
=
table
@columns
=
columns
end
def
pages
(
&
block
)
if
@columns
.
include?
(
"id"
)
# optimize the pagination using WHERE id > ?
pages_per_id
(
&
block
)
else
# fallback to `LIMIT ? OFFSET ?` when "id" is unavailable
pages_per_offset
(
&
block
)
end
end
def
pages_per_id
(
&
block
)
id_offset
=
0
loop
do
# a page of results
results
=
ActiveRecord
::
Base
.
connection
.
exec_query
(
<<-
SQL
.
squish
)
SELECT
#{
@columns
.
join
(
","
)
}
FROM
#{
@table
}
WHERE id >
#{
id_offset
}
ORDER BY id
LIMIT
#{
PAGE_SIZE
}
SQL
Rails
.
logger
.
debug
(
"
#{
self
.
class
.
name
}
fetch ids [
#{
id_offset
}
, +
#{
PAGE_SIZE
}
["
)
break
if
results
.
empty?
id_offset
=
results
.
last
[
"id"
].
to_i
yield
results
break
if
results
.
count
<
PAGE_SIZE
end
end
def
pages_per_offset
(
&
block
)
offset
=
0
loop
do
# a page of results
results
=
ActiveRecord
::
Base
.
connection
.
exec_query
(
<<-
SQL
.
squish
)
SELECT
#{
@columns
.
join
(
","
)
}
FROM
#{
@table
}
ORDER BY
#{
@columns
.
join
(
","
)
}
LIMIT
#{
PAGE_SIZE
}
OFFSET
#{
offset
}
SQL
Rails
.
logger
.
debug
(
"
#{
self
.
class
.
name
}
fetching offset [
#{
offset
}
,
#{
offset
+
PAGE_SIZE
}
["
)
break
if
results
.
empty?
offset
+=
PAGE_SIZE
yield
results
break
if
results
.
count
<
PAGE_SIZE
end
end
end
end
spec/lib/pseudonymizer/pager_spec.rb
0 → 100644
View file @
879c6586
require
'spec_helper'
describe
Pseudonymizer
::
Pager
do
class
Counter
@count
=
0
def
increment
(
*
args
)
self
.
count
+=
1
end
end
let
(
:page_size
)
{
1
}
let!
(
:projects
)
{
create_list
(
:project
,
10
)
}
subject
{
described_class
.
new
(
"projects"
,
whitelisted_columns
)
}
before
do
stub_const
(
"Pseudonymizer::Pager::PAGE_SIZE"
,
page_size
)
end
shared_examples
"yield results in page"
do
it
do
page_count
=
0
result_count
=
0
subject
.
pages
do
|
page
|
result_count
+=
page
.
count
page_count
+=
1
end
expect
(
result_count
).
to
eq
(
projects
.
count
)
expect
(
page_count
).
to
eq
(
projects
.
count
/
page_size
)
end
end
context
"`id` column is present"
do
let
(
:whitelisted_columns
)
{
%w(id name)
}
describe
"#pages"
do
it
"delegates to #pages_per_id"
do
expect
(
subject
).
to
receive
(
:pages_per_id
)
subject
.
pages
{
|
page
|
nil
}
end
include_examples
"yield results in page"
end
end
context
"`id` column is missing"
do
let
(
:whitelisted_columns
)
{
%w(name)
}
describe
"#pages"
do
it
"delegates to #pages_per_offset"
do
expect
(
subject
).
to
receive
(
:pages_per_offset
)
subject
.
pages
{
|
page
|
nil
}
end
include_examples
"yield results in page"
end
end
end
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment