dumper.rb 3.35 KB
Newer Older
1 2
module Pseudonymizer
  class Dumper
3
    attr_accessor :config, :output_dir
4

Micaël Bergeron's avatar
Micaël Bergeron committed
5
    def initialize(options)
Micaël Bergeron's avatar
Micaël Bergeron committed
6
      @config = options.config.deep_symbolize_keys
Micaël Bergeron's avatar
Micaël Bergeron committed
7
      @output_dir = options.output_dir
Micaël Bergeron's avatar
Micaël Bergeron committed
8
      @start_at = options.start_at
Micaël Bergeron's avatar
Micaël Bergeron committed
9

Micaël Bergeron's avatar
Micaël Bergeron committed
10 11 12 13
      reset!
    end

    def reset!
Micaël Bergeron's avatar
Micaël Bergeron committed
14
      @schema = Hash.new { |h, k| h[k] = {} }
15
      @output_files = []
16 17 18
    end

    def tables_to_csv
19
      return @output_files unless @output_files.empty?
Jacob Schatz's avatar
Jacob Schatz committed
20

Micaël Bergeron's avatar
Micaël Bergeron committed
21
      tables = config[:tables]
22
      FileUtils.mkdir_p(output_dir) unless File.directory?(output_dir)
Jacob Schatz's avatar
Jacob Schatz committed
23

Micaël Bergeron's avatar
Micaël Bergeron committed
24
      @output_files = tables.map do |k, v|
Micaël Bergeron's avatar
Micaël Bergeron committed
25 26
        table_to_csv(k, v[:whitelist], v[:pseudo])
      end.compact
27
      schema_to_yml
Micaël Bergeron's avatar
Micaël Bergeron committed
28
      file_list_to_json
29

Micaël Bergeron's avatar
Micaël Bergeron committed
30
      @output_files
31 32
    end

Micaël Bergeron's avatar
Micaël Bergeron committed
33 34
    private

Micaël Bergeron's avatar
Micaël Bergeron committed
35
    def output_filename(basename = nil, ext = "csv.gz")
36
      File.join(output_dir, "#{basename}.#{ext}")
37 38
    end

39
    def schema_to_yml
Micaël Bergeron's avatar
Micaël Bergeron committed
40
      file_path = output_filename("schema", "yml")
Micaël Bergeron's avatar
Micaël Bergeron committed
41
      File.write(file_path, @schema.to_yaml)
42
    end
43

44
    def file_list_to_json
Micaël Bergeron's avatar
Micaël Bergeron committed
45
      file_path = output_filename("file_list", "json")
Micaël Bergeron's avatar
Micaël Bergeron committed
46 47
      relative_files = @output_files.map(&File.method(:basename))
      File.write(file_path, relative_files.to_json)
48 49
    end

50 51
    def table_to_csv(table, whitelist_columns, pseudonymity_columns)
      table_to_schema(table)
Micaël Bergeron's avatar
Micaël Bergeron committed
52 53 54 55 56 57
      write_to_csv_file(
        table,
        table_page_results(table,
                           whitelist_columns,
                           pseudonymity_columns)
      )
58
    rescue => e
59
      Rails.logger.error("Failed to export #{table}: #{e}")
Micaël Bergeron's avatar
Micaël Bergeron committed
60
      raise e
61 62
    end

63 64
    # yield every results, pagined, anonymized
    def table_page_results(table, whitelist_columns, pseudonymity_columns)
65
      filter = Filter.new(table, whitelist_columns, pseudonymity_columns)
Micaël Bergeron's avatar
Micaël Bergeron committed
66
      pager = Pager.new(table, whitelist_columns)
67 68

      Enumerator.new do |yielder|
Micaël Bergeron's avatar
Micaël Bergeron committed
69
        pager.pages do |page|
70
          filter.anonymize(page).each do |result|
71 72
            yielder << result
          end
73
        end
74
      end.lazy
75 76 77
    end

    def table_to_schema(table)
Micaël Bergeron's avatar
Micaël Bergeron committed
78
      table_config = @config.dig(:tables, table)
Micaël Bergeron's avatar
Micaël Bergeron committed
79

Jacob Schatz's avatar
Jacob Schatz committed
80
      type_results = ActiveRecord::Base.connection.columns(table)
Jacob Schatz's avatar
Jacob Schatz committed
81
      type_results = type_results.select do |c|
Micaël Bergeron's avatar
Micaël Bergeron committed
82
        table_config[:whitelist].include?(c.name)
Jacob Schatz's avatar
Jacob Schatz committed
83
      end
84

Jacob Schatz's avatar
Jacob Schatz committed
85 86
      type_results = type_results.map do |c|
        data_type = c.sql_type
Jacob Schatz's avatar
Jacob Schatz committed
87

Micaël Bergeron's avatar
Micaël Bergeron committed
88
        if table_config[:pseudo].include?(c.name)
Jacob Schatz's avatar
Jacob Schatz committed
89 90
          data_type = "character varying"
        end
Jacob Schatz's avatar
Jacob Schatz committed
91

Jacob Schatz's avatar
Jacob Schatz committed
92 93
        { name: c.name, data_type: data_type }
      end
Micaël Bergeron's avatar
Micaël Bergeron committed
94

95
      set_schema_column_types(table, type_results)
96 97
    end

98
    def set_schema_column_types(table, type_results)
99 100
      has_id = type_results.any? {|c| c[:name] == "id" }

Jacob Schatz's avatar
Jacob Schatz committed
101
      type_results.each do |type_result|
102
        @schema[table.to_s][type_result[:name]] = type_result[:data_type]
103
      end
Micaël Bergeron's avatar
Micaël Bergeron committed
104

105 106 107 108
      if has_id
        # if there is an ID, it is the mapping_key
        @schema[table.to_s]["gl_mapping_key"] = "id"
      end
109 110
    end

Micaël Bergeron's avatar
Micaël Bergeron committed
111
    def write_to_csv_file(table, contents)
112
      file_path = output_filename(table)
Micaël Bergeron's avatar
Micaël Bergeron committed
113
      headers = contents.peek.keys
114

Micaël Bergeron's avatar
Micaël Bergeron committed
115
      Rails.logger.info "#{self.class.name} writing #{table} to #{file_path}."
Micaël Bergeron's avatar
Micaël Bergeron committed
116
      Zlib::GzipWriter.open(file_path) do |io|
Micaël Bergeron's avatar
Micaël Bergeron committed
117 118
        csv = CSV.new(io, headers: headers, write_headers: true)
        contents.each { |row| csv << row.values }
119
      end
120

Jacob Schatz's avatar
Jacob Schatz committed
121
      file_path
Micaël Bergeron's avatar
Micaël Bergeron committed
122 123 124
    rescue StopIteration
      Rails.logger.info "#{self.class.name} table #{table} is empty."
      nil
125 126
    end
  end
Jacob Schatz's avatar
Jacob Schatz committed
127
end