Commit 1ac6a03f authored by Jacob Schatz's avatar Jacob Schatz Committed by Micaël Bergeron

Use enumerators to save memory instead of just plain old `each`

parent c88f66e5
...@@ -6,7 +6,7 @@ tables: ...@@ -6,7 +6,7 @@ tables:
- user_id - user_id
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
- merge_request_id - merge_request_id
- user_id - user_id
...@@ -17,7 +17,7 @@ tables: ...@@ -17,7 +17,7 @@ tables:
- group_id - group_id
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
- group_id - group_id
board_assignees: board_assignees:
...@@ -25,7 +25,7 @@ tables: ...@@ -25,7 +25,7 @@ tables:
- id - id
- board_id - board_id
- assignee_id - assignee_id
anon: pseudo:
- id - id
- board_id - board_id
- assignee_id - assignee_id
...@@ -34,7 +34,7 @@ tables: ...@@ -34,7 +34,7 @@ tables:
- id - id
- board_id - board_id
- label_id - label_id
anon: pseudo:
- id - id
- board_id - board_id
- label_id - label_id
...@@ -47,7 +47,7 @@ tables: ...@@ -47,7 +47,7 @@ tables:
- milestone_id - milestone_id
- group_id - group_id
- weight - weight
anon: pseudo:
- id - id
- project_id - project_id
- milestone_id - milestone_id
...@@ -58,7 +58,7 @@ tables: ...@@ -58,7 +58,7 @@ tables:
- epic_id - epic_id
- issue_id - issue_id
- relative_position - relative_position
anon: pseudo:
- id - id
- epic_id - epic_id
- issue_id - issue_id
...@@ -68,7 +68,7 @@ tables: ...@@ -68,7 +68,7 @@ tables:
- epic_id - epic_id
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
epics: epics:
whitelist: whitelist:
...@@ -89,7 +89,7 @@ tables: ...@@ -89,7 +89,7 @@ tables:
- updated_at - updated_at
- title - title
- description - description
anon: pseudo:
- id - id
- milestone_id - milestone_id
- group_id - group_id
...@@ -109,7 +109,7 @@ tables: ...@@ -109,7 +109,7 @@ tables:
whitelist: whitelist:
- user_id - user_id
- issue_id - issue_id
anon: pseudo:
- user_id - user_id
- issue_id - issue_id
issue_links: issue_links:
...@@ -119,7 +119,7 @@ tables: ...@@ -119,7 +119,7 @@ tables:
- target_id - target_id
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
- source_id - source_id
- target_id - target_id
...@@ -132,7 +132,7 @@ tables: ...@@ -132,7 +132,7 @@ tables:
- first_added_to_board_at - first_added_to_board_at
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
- issue_id - issue_id
issues: issues:
...@@ -157,7 +157,7 @@ tables: ...@@ -157,7 +157,7 @@ tables:
- discussion_locked - discussion_locked
- closed_at - closed_at
- closed_by_id - closed_by_id
anon: pseudo:
- id - id
- title - title
- author_id - author_id
...@@ -177,7 +177,7 @@ tables: ...@@ -177,7 +177,7 @@ tables:
- target_type - target_type
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
- label_id - label_id
- target_id - target_id
...@@ -189,7 +189,7 @@ tables: ...@@ -189,7 +189,7 @@ tables:
- priority - priority
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
- project_id - project_id
- label_id - label_id
...@@ -204,7 +204,7 @@ tables: ...@@ -204,7 +204,7 @@ tables:
- template - template
- type - type
- group_id - group_id
anon: pseudo:
- id - id
- title - title
- color - color
...@@ -219,7 +219,7 @@ tables: ...@@ -219,7 +219,7 @@ tables:
- id - id
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
merge_request_diff_commits: merge_request_diff_commits:
whitelist: whitelist:
...@@ -231,7 +231,7 @@ tables: ...@@ -231,7 +231,7 @@ tables:
- author_email - author_email
- committer_name - committer_name
- committer_email - committer_email
anon: pseudo:
- merge_request_diff_id - merge_request_diff_id
- author_name - author_name
- author_email - author_email
...@@ -247,7 +247,7 @@ tables: ...@@ -247,7 +247,7 @@ tables:
- too_large - too_large
- a_mode - a_mode
- b_mode - b_mode
anon: pseudo:
- merge_request_diff_id - merge_request_diff_id
merge_request_diffs: merge_request_diffs:
whitelist: whitelist:
...@@ -261,7 +261,7 @@ tables: ...@@ -261,7 +261,7 @@ tables:
- head_commit_sha - head_commit_sha
- start_commit_sha - start_commit_sha
- commits_count - commits_count
anon: pseudo:
- id - id
- merge_request_id - merge_request_id
- base_commit_sha - base_commit_sha
...@@ -281,7 +281,7 @@ tables: ...@@ -281,7 +281,7 @@ tables:
- merged_by_id - merged_by_id
- latest_closed_by_id - latest_closed_by_id
- latest_closed_at - latest_closed_at
anon: pseudo:
- id - id
- merge_request_id - merge_request_id
- pipeline_id - pipeline_id
...@@ -316,7 +316,7 @@ tables: ...@@ -316,7 +316,7 @@ tables:
- discussion_locked - discussion_locked
- latest_merge_request_diff_id - latest_merge_request_diff_id
- allow_maintainer_to_push - allow_maintainer_to_push
anon: pseudo:
- id - id
- target_branch - target_branch
- source_branch - source_branch
...@@ -337,7 +337,7 @@ tables: ...@@ -337,7 +337,7 @@ tables:
- issue_id - issue_id
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
- merge_request_id - merge_request_id
- issue_id - issue_id
...@@ -351,7 +351,7 @@ tables: ...@@ -351,7 +351,7 @@ tables:
- state - state
- start_date - start_date
- group_id - group_id
anon: pseudo:
- id - id
- project_id - project_id
- group_id - group_id
...@@ -361,7 +361,7 @@ tables: ...@@ -361,7 +361,7 @@ tables:
- namespace_id - namespace_id
- shared_runners_seconds - shared_runners_seconds
- shared_runners_seconds_last_reset - shared_runners_seconds_last_reset
anon: pseudo:
- id - id
- namespace_id - namespace_id
namespaces: namespaces:
...@@ -391,7 +391,7 @@ tables: ...@@ -391,7 +391,7 @@ tables:
- two_factor_grace_period - two_factor_grace_period
- plan_id - plan_id
- project_creation_level - project_creation_level
anon: pseudo:
- id - id
- name - name
- path - path
...@@ -441,7 +441,7 @@ tables: ...@@ -441,7 +441,7 @@ tables:
- cached_markdown_version - cached_markdown_version
- change_position - change_position
- resolved_by_push - resolved_by_push
anon: pseudo:
- id - id
- note - note
- noteable_type - noteable_type
...@@ -489,7 +489,7 @@ tables: ...@@ -489,7 +489,7 @@ tables:
- success_pipeline - success_pipeline
- push_to_merge_request - push_to_merge_request
- issue_due - issue_due
anon: pseudo:
- id - id
- user_id - user_id
- source_id - source_id
...@@ -516,7 +516,7 @@ tables: ...@@ -516,7 +516,7 @@ tables:
- user_id - user_id
- project_id - project_id
- access_level - access_level
anon: pseudo:
- user_id - user_id
- project_id - project_id
- access_level - access_level
...@@ -528,7 +528,7 @@ tables: ...@@ -528,7 +528,7 @@ tables:
- updated_at - updated_at
- enabled - enabled
- domain - domain
anon: pseudo:
- id - id
- project_id - project_id
- created_at - created_at
...@@ -540,7 +540,7 @@ tables: ...@@ -540,7 +540,7 @@ tables:
- id - id
- project_id - project_id
- group_runners_enabled - group_runners_enabled
anon: pseudo:
- id - id
- project_id - project_id
- group_runners_enabled - group_runners_enabled
...@@ -552,7 +552,7 @@ tables: ...@@ -552,7 +552,7 @@ tables:
- project_id - project_id
- key - key
- value - value
anon: pseudo:
- id - id
- created_at - created_at
- updated_at - updated_at
...@@ -565,7 +565,7 @@ tables: ...@@ -565,7 +565,7 @@ tables:
- project_id - project_id
- deploy_token_id - deploy_token_id
- created_at - created_at
anon: pseudo:
- id - id
- project_id - project_id
- deploy_token_id - deploy_token_id
...@@ -582,7 +582,7 @@ tables: ...@@ -582,7 +582,7 @@ tables:
- created_at - created_at
- updated_at - updated_at
- repository_access_level - repository_access_level
anon: pseudo:
- id - id
- project_id - project_id
- merge_requests_access_level - merge_requests_access_level
...@@ -602,7 +602,7 @@ tables: ...@@ -602,7 +602,7 @@ tables:
- updated_at - updated_at
- group_access - group_access
- expires_at - expires_at
anon: pseudo:
- id - id
- project_id - project_id
- group_id - group_id
...@@ -618,7 +618,7 @@ tables: ...@@ -618,7 +618,7 @@ tables:
- encrypted_credentials - encrypted_credentials
- encrypted_credentials_iv - encrypted_credentials_iv
- encrypted_credentials_salt - encrypted_credentials_salt
anon: pseudo:
- id - id
- project_id - project_id
- data - data
...@@ -635,7 +635,7 @@ tables: ...@@ -635,7 +635,7 @@ tables:
- next_execution_timestamp - next_execution_timestamp
- created_at - created_at
- updated_at - updated_at
anon: pseudo:
- id - id
- project_id - project_id
- retry_count - retry_count
...@@ -652,7 +652,7 @@ tables: ...@@ -652,7 +652,7 @@ tables:
- wiki_verification_checksum - wiki_verification_checksum
- last_repository_verification_failure - last_repository_verification_failure
- last_wiki_verification_failure - last_wiki_verification_failure
anon: pseudo:
- id - id
- project_id - project_id
- repository_verification_checksum - repository_verification_checksum
...@@ -671,7 +671,7 @@ tables: ...@@ -671,7 +671,7 @@ tables:
- build_artifacts_size - build_artifacts_size
- shared_runners_seconds - shared_runners_seconds
- shared_runners_seconds_last_reset - shared_runners_seconds_last_reset
anon: pseudo:
- id - id
- project_id - project_id
- namespace_id - namespace_id
...@@ -753,7 +753,7 @@ tables: ...@@ -753,7 +753,7 @@ tables:
- external_authorization_classification_label - external_authorization_classification_label
- external_webhook_token - external_webhook_token
- pages_https_only - pages_https_only
anon: pseudo:
- id - id
- name - name
- path - path
...@@ -833,7 +833,7 @@ tables: ...@@ -833,7 +833,7 @@ tables:
- created_at - created_at
- updated_at - updated_at
- project_id - project_id
anon: pseudo:
- id - id
- user_id - user_id
- subscribable_id - subscribable_id
...@@ -901,7 +901,7 @@ tables: ...@@ -901,7 +901,7 @@ tables:
- preferred_language - preferred_language
- rss_token - rss_token
- theme_id - theme_id
anon: pseudo:
- id - id
- email - email
- remember_created_at - remember_created_at
......
...@@ -12,13 +12,14 @@ module Pseudonymity ...@@ -12,13 +12,14 @@ module Pseudonymity
columns = results.columns # Assume they all have the same table columns = results.columns # Assume they all have the same table
to_filter = @anon_fields & columns to_filter = @anon_fields & columns
results.each do |result| Enumerator.new do | yielder |
to_filter.each do |field| results.each do |result|
result[field] = Digest::SHA2.new(256).hexdigest(result[field]) unless result[field].nil? to_filter.each do |field|
result[field] = Digest::SHA2.new(256).hexdigest(result[field]) unless result[field].nil?
end
yielder << result
end end
end end
results
end end
end end
...@@ -34,16 +35,17 @@ module Pseudonymity ...@@ -34,16 +35,17 @@ module Pseudonymity
tables = @config["tables"] tables = @config["tables"]
tables.map do | k, v | tables.map do | k, v |
table_to_csv(k, v["whitelist"], v["anon"]) table_to_csv(k, v["whitelist"], v["pseudo"])
end end
end end
def table_to_csv(table, whitelist_columns, pseudonymity_columns) def table_to_csv(table, whitelist_columns, pseudonymity_columns)
sql = "SELECT #{whitelist_columns.join(",")} from #{table}" sql = "SELECT #{whitelist_columns.join(",")} from #{table}"
results = ActiveRecord::Base.connection.exec_query(sql) results = ActiveRecord::Base.connection.exec_query(sql)
return if results.empty?
anon = Anon.new(pseudonymity_columns) anon = Anon.new(pseudonymity_columns)
results = anon.anonymize results write_to_csv_file(table, anon.anonymize(results))
write_to_csv_file table, results
end end
def parse_config def parse_config
...@@ -52,10 +54,6 @@ module Pseudonymity ...@@ -52,10 +54,6 @@ module Pseudonymity
def write_to_csv_file(title, contents) def write_to_csv_file(title, contents)
file_path = "/tmp/#{title}.csv" file_path = "/tmp/#{title}.csv"
if contents.empty?
File.open(file_path, "w") {}
return file_path
end
column_names = contents.first.keys column_names = contents.first.keys
contents = CSV.generate do | csv | contents = CSV.generate do | csv |
csv << column_names csv << column_names
......
...@@ -73,6 +73,7 @@ namespace :gitlab do ...@@ -73,6 +73,7 @@ namespace :gitlab do
desc 'Output pseudonymity dump of selected table' desc 'Output pseudonymity dump of selected table'
task :pseudonymity_dump => :environment do task :pseudonymity_dump => :environment do
table = Pseudonymity::Table.new table = Pseudonymity::Table.new
table.tables_to_csv
end end
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment