Commit 035b9f73 authored by Micaël Bergeron's avatar Micaël Bergeron

apply review feedback

parent 32fd829f
...@@ -14,10 +14,7 @@ ...@@ -14,10 +14,7 @@
- if is_enabled - if is_enabled
= pseudonymizer_description_text = pseudonymizer_description_text
- else - else
- if is_available = pseudonymizer_disabled_description_text
= pseudonymizer_disabled_description_text
- else
= pseudonymizer_unavailable_description_text
= f.submit 'Save changes', class: "btn btn-success" = f.submit 'Save changes', class: "btn btn-success"
...@@ -238,7 +238,7 @@ ...@@ -238,7 +238,7 @@
= render 'usage' = render 'usage'
- if Gitlab::CurrentSettings.pseudonymizer_can_be_configured? - if Gitlab::CurrentSettings.pseudonymizer_can_be_configured?
%section.settings.as-usage.no-animate#js-elt-database-dump-settings{ class: ('expanded' if expanded) } %section.settings.as-pseudonymizer.no-animate#js-pseudonymizer-settings{ class: ('expanded' if expanded) }
.settings-header .settings-header
%h4 %h4
= _('Pseudonymizer Cron Job') = _('Pseudonymizer Cron Job')
......
...@@ -13,7 +13,7 @@ class PseudonymizerWorker ...@@ -13,7 +13,7 @@ class PseudonymizerWorker
dumper = Pseudonymizer::Dumper.new(options) dumper = Pseudonymizer::Dumper.new(options)
dumper.tables_to_csv dumper.tables_to_csv
uploader = Pseudonymizer::Uploader.new(options) uploader = Pseudonymizer::Uploader.new(options, progress_output: File.open(File::NULL, "w"))
uploader.upload uploader.upload
uploader.cleanup uploader.cleanup
end end
......
...@@ -479,7 +479,7 @@ Settings.backup['upload']['storage_class'] ||= nil ...@@ -479,7 +479,7 @@ Settings.backup['upload']['storage_class'] ||= nil
# #
Settings['pseudonymizer'] ||= Settingslogic.new({}) Settings['pseudonymizer'] ||= Settingslogic.new({})
Settings.pseudonymizer['enabled'] = false if Settings.pseudonymizer['enabled'].nil? Settings.pseudonymizer['enabled'] = false if Settings.pseudonymizer['enabled'].nil?
Settings.pseudonymizer['manifest'] = Settings.pseudonymizer['manifest'] || "lib/pseudonymity/manifest.yml" Settings.pseudonymizer['manifest'] = Settings.pseudonymizer['manifest'] || "lib/pseudonymizer/manifest.yml"
Settings.pseudonymizer['upload'] ||= Settingslogic.new({ 'remote_directory' => nil, 'connection' => nil }) Settings.pseudonymizer['upload'] ||= Settingslogic.new({ 'remote_directory' => nil, 'connection' => nil })
# Settings.pseudonymizer['upload']['multipart_chunk_size'] ||= 104857600 # Settings.pseudonymizer['upload']['multipart_chunk_size'] ||= 104857600
......
...@@ -43,10 +43,6 @@ module EE ...@@ -43,10 +43,6 @@ module EE
_("The pseudonymizer database cron job is disabled. When enabled the cron job will send pseudoanonymized data to be processed and analyzed.") _("The pseudonymizer database cron job is disabled. When enabled the cron job will send pseudoanonymized data to be processed and analyzed.")
end end
def pseudonymizer_unavailable_description_text
_("The pseudonymizer cron job is disabled. Once enabled, the cron job will send pseudoanonymized data to be processed and analyzed.")
end
override :visible_attributes override :visible_attributes
def visible_attributes def visible_attributes
super + [ super + [
......
...@@ -36,8 +36,9 @@ module Pseudonymizer ...@@ -36,8 +36,9 @@ module Pseudonymizer
def initialize(options) def initialize(options)
@config = options.config @config = options.config
@output_dir = options.output_dir @output_dir = options.output_dir
@start_at = options.start_at
@schema = {} @schema = Hash.new { |h, k| h[k] = {} }
@output_files = [] @output_files = []
end end
...@@ -49,13 +50,15 @@ module Pseudonymizer ...@@ -49,13 +50,15 @@ module Pseudonymizer
schema_to_yml schema_to_yml
file_list_to_json file_list_to_json
tables.each do |k, v| tables.map do |k, v|
table_to_csv(k, v['whitelist'], v['pseudo']) table_to_csv(k, v['whitelist'], v['pseudo'])
end end
end end
private
def get_and_log_file_name(ext, prefix = nil, filename = nil) def get_and_log_file_name(ext, prefix = nil, filename = nil)
file_timestamp = filename || "#{prefix}_#{Time.now.to_i}" file_timestamp = filename || "#{prefix}_#{@start_at.to_i}"
file_timestamp = "#{file_timestamp}.#{ext}" file_timestamp = "#{file_timestamp}.#{ext}"
@output_files << file_timestamp @output_files << file_timestamp
File.join(output_dir, file_timestamp) File.join(output_dir, file_timestamp)
...@@ -72,7 +75,6 @@ module Pseudonymizer ...@@ -72,7 +75,6 @@ module Pseudonymizer
end end
def table_to_csv(table, whitelist_columns, pseudonymity_columns) def table_to_csv(table, whitelist_columns, pseudonymity_columns)
@schema[table] = {}
table_to_schema(table) table_to_schema(table)
write_to_csv_file(table, table_page_results(table, whitelist_columns, pseudonymity_columns)) write_to_csv_file(table, table_page_results(table, whitelist_columns, pseudonymity_columns))
rescue => e rescue => e
...@@ -131,10 +133,10 @@ module Pseudonymizer ...@@ -131,10 +133,10 @@ module Pseudonymizer
@schema[table]["gl_mapping_key"] = "id" @schema[table]["gl_mapping_key"] = "id"
end end
def write_to_csv_file(title, contents) def write_to_csv_file(table, contents)
Rails.logger.info "Writing #{title} ..." file_path = get_and_log_file_name("csv", table)
file_path = get_and_log_file_name("csv", title)
Rails.logger.info "#{self.class.name} writing #{table} to #{file_path}."
CSV.open(file_path, 'w') do |csv| CSV.open(file_path, 'w') do |csv|
contents.with_index do |row, i| contents.with_index do |row, i|
csv << row.keys if i == 0 # header csv << row.keys if i == 0 # header
...@@ -145,7 +147,5 @@ module Pseudonymizer ...@@ -145,7 +147,5 @@ module Pseudonymizer
file_path file_path
end end
private :write_to_csv_file
end end
end end
...@@ -3,13 +3,13 @@ module Pseudonymizer ...@@ -3,13 +3,13 @@ module Pseudonymizer
attr_reader :config attr_reader :config
attr_reader :start_at attr_reader :start_at
def initialize(config: {}, start_at: Time.now.utc) def initialize(config: {})
@config = config @config = config
@start_at = start_at @start_at = Time.now.utc
end end
def output_dir def output_dir
File.join('/tmp', 'gitlab-pseudonymizer', start_at.iso8601) File.join(Dir.tmpdir, 'gitlab-pseudonymizer', start_at.iso8601)
end end
def upload_dir def upload_dir
......
module Pseudonymizer module Pseudonymizer
class Uploader class Uploader
include Gitlab::Utils::StrongMemoize
RemoteStorageUnavailableError = Class.new(StandardError) RemoteStorageUnavailableError = Class.new(StandardError)
# Our settings use string keys, but Fog expects symbols
def self.object_store_credentials def self.object_store_credentials
Gitlab.config.pseudonymizer.upload.connection.to_hash.deep_symbolize_keys Gitlab.config.pseudonymizer.upload.connection.to_hash.deep_symbolize_keys
end end
...@@ -10,8 +13,8 @@ module Pseudonymizer ...@@ -10,8 +13,8 @@ module Pseudonymizer
Gitlab.config.pseudonymizer.upload.remote_directory Gitlab.config.pseudonymizer.upload.remote_directory
end end
def initialize(options, progress = nil) def initialize(options, progress_output: nil)
@progress = progress || $stdout @progress_output = progress_output || $stdout
@config = options.config @config = options.config
@output_dir = options.output_dir @output_dir = options.output_dir
@upload_dir = options.upload_dir @upload_dir = options.upload_dir
...@@ -20,7 +23,7 @@ module Pseudonymizer ...@@ -20,7 +23,7 @@ module Pseudonymizer
end end
def upload def upload
progress.puts "Uploading output files to remote storage #{remote_directory} ... " progress_output.puts "Uploading output files to remote storage #{remote_directory}:"
file_list.each do |file| file_list.each do |file|
upload_file(file, remote_directory) upload_file(file, remote_directory)
...@@ -28,43 +31,37 @@ module Pseudonymizer ...@@ -28,43 +31,37 @@ module Pseudonymizer
end end
def cleanup def cleanup
progress.print "Deleting tmp directory #{@output_dir} ... "
return unless File.exist?(@output_dir) return unless File.exist?(@output_dir)
if FileUtils.rm_rf(@output_dir) progress_output.print "Deleting tmp directory #{@output_dir} ... "
progress.puts "done".color(:green) progress_output.puts FileUtils.rm_rf(@output_dir) ? "done".color(:green) : "failed".color(:red)
else
progress.puts "failed".color(:red)
end
end end
private private
attr_reader :progress attr_reader :progress_output
def upload_file(file, directory) def upload_file(file, directory)
progress.print "\t#{file} ... " progress_output.print "\t#{file} ... "
if directory.files.create(key: File.join(@upload_dir, File.basename(file)), if directory.files.create(key: File.join(@upload_dir, File.basename(file)),
body: File.open(file), body: File.open(file),
public: false) public: false)
progress.puts "done".color(:green) progress_output.puts "done".color(:green)
else else
progress.puts "uploading CSV to #{remote_directory} failed".color(:red) progress_output.puts "failed".color(:red)
end end
end end
def remote_directory def remote_directory
if @connection_params.blank? strong_memoize(:remote_directory) { connect_to_remote_directory }
progress.puts "Cannot upload files, make sure the `pseudonimizer.upload.connection` is set properly".color(:red)
raise RemoteStorageUnavailableError.new(@config)
end
connect_to_remote_directory
end end
def connect_to_remote_directory def connect_to_remote_directory
# our settings use string keys, but Fog expects symbols if @connection_params.blank?
abort "Cannot upload files, make sure the `pseudonimizer.upload.connection` is set properly".color(:red)
end
connection = ::Fog::Storage.new(@connection_params) connection = ::Fog::Storage.new(@connection_params)
# We only attempt to create the directory for local backups. For AWS # We only attempt to create the directory for local backups. For AWS
......
...@@ -72,32 +72,19 @@ namespace :gitlab do ...@@ -72,32 +72,19 @@ namespace :gitlab do
desc 'Output pseudonymity dump of selected tables' desc 'Output pseudonymity dump of selected tables'
task pseudonymizer: :environment do task pseudonymizer: :environment do
unless License.feature_available? :pseudonymizer abort "The pseudonymizer is not available with this license." unless License.feature_available?(:pseudonymizer)
raise "The pseudonymizer is not available with this license." abort "The pseudonymizer is disabled." unless Gitlab::CurrentSettings.pseudonymizer_enabled?
end
abort "Pseudonymizer disabled." unless Gitlab::CurrentSettings.pseudonymizer_enabled?
options = Pseudonymizer::Options.new( options = Pseudonymizer::Options.new(
config: YAML.load_file(Rails.root.join(Gitlab.config.pseudonymizer.manifest)), config: YAML.load_file(Rails.root.join(Gitlab.config.pseudonymizer.manifest))
start_at: Time.now.utc
) )
dumper = Pseudonymizer::Dumper.new(options) dumper = Pseudonymizer::Dumper.new(options)
dumper.tables_to_csv dumper.tables_to_csv
uploader = Pseudonymizer::Uploader.new(options, progress) uploader = Pseudonymizer::Uploader.new(options)
uploader.upload uploader.upload
uploader.cleanup uploader.cleanup
end end
def progress
if ENV['CRON']
# Do not output progress for Cron
StringIO.new
else
$stdout
end
end
end end
end end
...@@ -4,8 +4,9 @@ describe Pseudonymizer::Dumper do ...@@ -4,8 +4,9 @@ describe Pseudonymizer::Dumper do
let!(:project) { create(:project) } let!(:project) { create(:project) }
let(:base_dir) { Dir.mktmpdir } let(:base_dir) { Dir.mktmpdir }
let(:options) do let(:options) do
Pseudonymizer::Options.new(config: Gitlab.config.pseudonymizer, Pseudonymizer::Options.new(
start_at: Time.now.utc) config: YAML.load_file(Rails.root.join(Gitlab.config.pseudonymizer.manifest))
)
end end
subject(:pseudo) { described_class.new(options) } subject(:pseudo) { described_class.new(options) }
...@@ -17,7 +18,6 @@ describe Pseudonymizer::Dumper do ...@@ -17,7 +18,6 @@ describe Pseudonymizer::Dumper do
FileUtils.rm_rf(base_dir) FileUtils.rm_rf(base_dir)
end end
# create temp directory in before block
describe 'Pseudo tables' do describe 'Pseudo tables' do
it 'outputs project tables to csv' do it 'outputs project tables to csv' do
pseudo.config["tables"] = { pseudo.config["tables"] = {
...@@ -31,7 +31,6 @@ describe Pseudonymizer::Dumper do ...@@ -31,7 +31,6 @@ describe Pseudonymizer::Dumper do
# grab the first table it outputs. There would only be 1. # grab the first table it outputs. There would only be 1.
project_table_file = pseudo.tables_to_csv[0] project_table_file = pseudo.tables_to_csv[0]
# Ignore the `.` and `..` in the directory.
expect(project_table_file.include? "projects_").to be true expect(project_table_file.include? "projects_").to be true
expect(project_table_file.include? ".csv").to be true expect(project_table_file.include? ".csv").to be true
...@@ -40,9 +39,7 @@ describe Pseudonymizer::Dumper do ...@@ -40,9 +39,7 @@ describe Pseudonymizer::Dumper do
File.foreach(project_table_file).with_index do |line, line_num| File.foreach(project_table_file).with_index do |line, line_num|
if line_num == 0 if line_num == 0
columns = line.split(",") columns = line.split(",")
end elsif line_num == 1
if line_num == 1
project_data = line.split(",") project_data = line.split(",")
break break
end end
......
...@@ -3,10 +3,11 @@ require 'spec_helper' ...@@ -3,10 +3,11 @@ require 'spec_helper'
describe Pseudonymizer::Uploader do describe Pseudonymizer::Uploader do
let(:base_dir) { Dir.mktmpdir } let(:base_dir) { Dir.mktmpdir }
let(:options) do let(:options) do
Pseudonymizer::Options.new(config: Gitlab.config.pseudonymizer, Pseudonymizer::Options.new(
start_at: Time.now.utc) config: YAML.load_file(Rails.root.join(Gitlab.config.pseudonymizer.manifest))
)
end end
let(:remote_directory) { subject.send(:remote_directory) } let(:remote_directory) { described_class.remote_directory }
subject { described_class.new(options) } subject { described_class.new(options) }
def mock_file(file_name) def mock_file(file_name)
...@@ -15,7 +16,7 @@ describe Pseudonymizer::Uploader do ...@@ -15,7 +16,7 @@ describe Pseudonymizer::Uploader do
before do before do
allow(options).to receive(:output_dir).and_return(base_dir) allow(options).to receive(:output_dir).and_return(base_dir)
stub_object_storage_pseudonymizer(options: options) stub_object_storage_pseudonymizer
10.times {|i| mock_file("file_#{i}.test")} 10.times {|i| mock_file("file_#{i}.test")}
mock_file("schema.yml") mock_file("schema.yml")
......
...@@ -63,8 +63,8 @@ module StubObjectStorage ...@@ -63,8 +63,8 @@ module StubObjectStorage
EOS EOS
end end
def stub_object_storage_pseudonymizer(options:) def stub_object_storage_pseudonymizer
stub_object_storage(connection_params: Pseudonymizer::Uploader.object_store_credentials, stub_object_storage(connection_params: Pseudonymizer::Uploader.object_store_credentials,
remote_directory: options.config.upload.remote_directory) remote_directory: Gitlab.config.pseudonymizer.upload.remote_directory)
end end
end end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment