Commit 035b9f73 authored by Micaël Bergeron's avatar Micaël Bergeron

apply review feedback

parent 32fd829f
......@@ -14,10 +14,7 @@
- if is_enabled
= pseudonymizer_description_text
- else
- if is_available
= pseudonymizer_disabled_description_text
- else
= pseudonymizer_unavailable_description_text
= pseudonymizer_disabled_description_text
= f.submit 'Save changes', class: "btn btn-success"
......@@ -238,7 +238,7 @@
= render 'usage'
- if Gitlab::CurrentSettings.pseudonymizer_can_be_configured?
%section.settings.as-usage.no-animate#js-elt-database-dump-settings{ class: ('expanded' if expanded) }
%section.settings.as-pseudonymizer.no-animate#js-pseudonymizer-settings{ class: ('expanded' if expanded) }
.settings-header
%h4
= _('Pseudonymizer Cron Job')
......
......@@ -13,7 +13,7 @@ class PseudonymizerWorker
dumper = Pseudonymizer::Dumper.new(options)
dumper.tables_to_csv
uploader = Pseudonymizer::Uploader.new(options)
uploader = Pseudonymizer::Uploader.new(options, progress_output: File.open(File::NULL, "w"))
uploader.upload
uploader.cleanup
end
......
......@@ -479,7 +479,7 @@ Settings.backup['upload']['storage_class'] ||= nil
#
Settings['pseudonymizer'] ||= Settingslogic.new({})
Settings.pseudonymizer['enabled'] = false if Settings.pseudonymizer['enabled'].nil?
Settings.pseudonymizer['manifest'] = Settings.pseudonymizer['manifest'] || "lib/pseudonymity/manifest.yml"
Settings.pseudonymizer['manifest'] = Settings.pseudonymizer['manifest'] || "lib/pseudonymizer/manifest.yml"
Settings.pseudonymizer['upload'] ||= Settingslogic.new({ 'remote_directory' => nil, 'connection' => nil })
# Settings.pseudonymizer['upload']['multipart_chunk_size'] ||= 104857600
......
......@@ -43,10 +43,6 @@ module EE
_("The pseudonymizer database cron job is disabled. When enabled the cron job will send pseudoanonymized data to be processed and analyzed.")
end
def pseudonymizer_unavailable_description_text
_("The pseudonymizer cron job is disabled. Once enabled, the cron job will send pseudoanonymized data to be processed and analyzed.")
end
override :visible_attributes
def visible_attributes
super + [
......
......@@ -36,8 +36,9 @@ module Pseudonymizer
def initialize(options)
@config = options.config
@output_dir = options.output_dir
@start_at = options.start_at
@schema = {}
@schema = Hash.new { |h, k| h[k] = {} }
@output_files = []
end
......@@ -49,13 +50,15 @@ module Pseudonymizer
schema_to_yml
file_list_to_json
tables.each do |k, v|
tables.map do |k, v|
table_to_csv(k, v['whitelist'], v['pseudo'])
end
end
private
def get_and_log_file_name(ext, prefix = nil, filename = nil)
file_timestamp = filename || "#{prefix}_#{Time.now.to_i}"
file_timestamp = filename || "#{prefix}_#{@start_at.to_i}"
file_timestamp = "#{file_timestamp}.#{ext}"
@output_files << file_timestamp
File.join(output_dir, file_timestamp)
......@@ -72,7 +75,6 @@ module Pseudonymizer
end
def table_to_csv(table, whitelist_columns, pseudonymity_columns)
@schema[table] = {}
table_to_schema(table)
write_to_csv_file(table, table_page_results(table, whitelist_columns, pseudonymity_columns))
rescue => e
......@@ -131,10 +133,10 @@ module Pseudonymizer
@schema[table]["gl_mapping_key"] = "id"
end
def write_to_csv_file(title, contents)
Rails.logger.info "Writing #{title} ..."
file_path = get_and_log_file_name("csv", title)
def write_to_csv_file(table, contents)
file_path = get_and_log_file_name("csv", table)
Rails.logger.info "#{self.class.name} writing #{table} to #{file_path}."
CSV.open(file_path, 'w') do |csv|
contents.with_index do |row, i|
csv << row.keys if i == 0 # header
......@@ -145,7 +147,5 @@ module Pseudonymizer
file_path
end
private :write_to_csv_file
end
end
......@@ -3,13 +3,13 @@ module Pseudonymizer
attr_reader :config
attr_reader :start_at
def initialize(config: {}, start_at: Time.now.utc)
def initialize(config: {})
@config = config
@start_at = start_at
@start_at = Time.now.utc
end
def output_dir
File.join('/tmp', 'gitlab-pseudonymizer', start_at.iso8601)
File.join(Dir.tmpdir, 'gitlab-pseudonymizer', start_at.iso8601)
end
def upload_dir
......
module Pseudonymizer
class Uploader
include Gitlab::Utils::StrongMemoize
RemoteStorageUnavailableError = Class.new(StandardError)
# Our settings use string keys, but Fog expects symbols
def self.object_store_credentials
Gitlab.config.pseudonymizer.upload.connection.to_hash.deep_symbolize_keys
end
......@@ -10,8 +13,8 @@ module Pseudonymizer
Gitlab.config.pseudonymizer.upload.remote_directory
end
def initialize(options, progress = nil)
@progress = progress || $stdout
def initialize(options, progress_output: nil)
@progress_output = progress_output || $stdout
@config = options.config
@output_dir = options.output_dir
@upload_dir = options.upload_dir
......@@ -20,7 +23,7 @@ module Pseudonymizer
end
def upload
progress.puts "Uploading output files to remote storage #{remote_directory} ... "
progress_output.puts "Uploading output files to remote storage #{remote_directory}:"
file_list.each do |file|
upload_file(file, remote_directory)
......@@ -28,43 +31,37 @@ module Pseudonymizer
end
def cleanup
progress.print "Deleting tmp directory #{@output_dir} ... "
return unless File.exist?(@output_dir)
if FileUtils.rm_rf(@output_dir)
progress.puts "done".color(:green)
else
progress.puts "failed".color(:red)
end
progress_output.print "Deleting tmp directory #{@output_dir} ... "
progress_output.puts FileUtils.rm_rf(@output_dir) ? "done".color(:green) : "failed".color(:red)
end
private
attr_reader :progress
attr_reader :progress_output
def upload_file(file, directory)
progress.print "\t#{file} ... "
progress_output.print "\t#{file} ... "
if directory.files.create(key: File.join(@upload_dir, File.basename(file)),
body: File.open(file),
public: false)
progress.puts "done".color(:green)
progress_output.puts "done".color(:green)
else
progress.puts "uploading CSV to #{remote_directory} failed".color(:red)
progress_output.puts "failed".color(:red)
end
end
def remote_directory
if @connection_params.blank?
progress.puts "Cannot upload files, make sure the `pseudonimizer.upload.connection` is set properly".color(:red)
raise RemoteStorageUnavailableError.new(@config)
end
connect_to_remote_directory
strong_memoize(:remote_directory) { connect_to_remote_directory }
end
def connect_to_remote_directory
# our settings use string keys, but Fog expects symbols
if @connection_params.blank?
abort "Cannot upload files, make sure the `pseudonimizer.upload.connection` is set properly".color(:red)
end
connection = ::Fog::Storage.new(@connection_params)
# We only attempt to create the directory for local backups. For AWS
......
......@@ -72,32 +72,19 @@ namespace :gitlab do
desc 'Output pseudonymity dump of selected tables'
task pseudonymizer: :environment do
unless License.feature_available? :pseudonymizer
raise "The pseudonymizer is not available with this license."
end
abort "Pseudonymizer disabled." unless Gitlab::CurrentSettings.pseudonymizer_enabled?
abort "The pseudonymizer is not available with this license." unless License.feature_available?(:pseudonymizer)
abort "The pseudonymizer is disabled." unless Gitlab::CurrentSettings.pseudonymizer_enabled?
options = Pseudonymizer::Options.new(
config: YAML.load_file(Rails.root.join(Gitlab.config.pseudonymizer.manifest)),
start_at: Time.now.utc
config: YAML.load_file(Rails.root.join(Gitlab.config.pseudonymizer.manifest))
)
dumper = Pseudonymizer::Dumper.new(options)
dumper.tables_to_csv
uploader = Pseudonymizer::Uploader.new(options, progress)
uploader = Pseudonymizer::Uploader.new(options)
uploader.upload
uploader.cleanup
end
def progress
if ENV['CRON']
# Do not output progress for Cron
StringIO.new
else
$stdout
end
end
end
end
......@@ -4,8 +4,9 @@ describe Pseudonymizer::Dumper do
let!(:project) { create(:project) }
let(:base_dir) { Dir.mktmpdir }
let(:options) do
Pseudonymizer::Options.new(config: Gitlab.config.pseudonymizer,
start_at: Time.now.utc)
Pseudonymizer::Options.new(
config: YAML.load_file(Rails.root.join(Gitlab.config.pseudonymizer.manifest))
)
end
subject(:pseudo) { described_class.new(options) }
......@@ -17,7 +18,6 @@ describe Pseudonymizer::Dumper do
FileUtils.rm_rf(base_dir)
end
# create temp directory in before block
describe 'Pseudo tables' do
it 'outputs project tables to csv' do
pseudo.config["tables"] = {
......@@ -31,7 +31,6 @@ describe Pseudonymizer::Dumper do
# grab the first table it outputs. There would only be 1.
project_table_file = pseudo.tables_to_csv[0]
# Ignore the `.` and `..` in the directory.
expect(project_table_file.include? "projects_").to be true
expect(project_table_file.include? ".csv").to be true
......@@ -40,9 +39,7 @@ describe Pseudonymizer::Dumper do
File.foreach(project_table_file).with_index do |line, line_num|
if line_num == 0
columns = line.split(",")
end
if line_num == 1
elsif line_num == 1
project_data = line.split(",")
break
end
......
......@@ -3,10 +3,11 @@ require 'spec_helper'
describe Pseudonymizer::Uploader do
let(:base_dir) { Dir.mktmpdir }
let(:options) do
Pseudonymizer::Options.new(config: Gitlab.config.pseudonymizer,
start_at: Time.now.utc)
Pseudonymizer::Options.new(
config: YAML.load_file(Rails.root.join(Gitlab.config.pseudonymizer.manifest))
)
end
let(:remote_directory) { subject.send(:remote_directory) }
let(:remote_directory) { described_class.remote_directory }
subject { described_class.new(options) }
def mock_file(file_name)
......@@ -15,7 +16,7 @@ describe Pseudonymizer::Uploader do
before do
allow(options).to receive(:output_dir).and_return(base_dir)
stub_object_storage_pseudonymizer(options: options)
stub_object_storage_pseudonymizer
10.times {|i| mock_file("file_#{i}.test")}
mock_file("schema.yml")
......
......@@ -63,8 +63,8 @@ module StubObjectStorage
EOS
end
def stub_object_storage_pseudonymizer(options:)
def stub_object_storage_pseudonymizer
stub_object_storage(connection_params: Pseudonymizer::Uploader.object_store_credentials,
remote_directory: options.config.upload.remote_directory)
remote_directory: Gitlab.config.pseudonymizer.upload.remote_directory)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment