Improve and simplify the distribution of static analysis tasks

Signed-off-by: Rémy Coutable <remy@rymai.me>

Improve and simplify the distribution of static analysis tasks
Signed-off-by: Rémy Coutable <remy@rymai.me>
d4b10599 · Rémy Coutable · 0bdd20eb · d4b10599
Commit d4b10599 authored Aug 24, 2021 by Rémy Coutable
Show whitespace changes
Inline Side-by-side

Showing with 84 additions and 67 deletions

scripts/static-analysis scripts/static-analysis +84 -67

No files found.
--- a/scripts/static-analysis
+++ b/scripts/static-analysis
@@ -14,38 +14,49 @@ class StaticAnalysis
    "Browserslist: caniuse-lite is outdated. Please run next command `yarn upgrade`"
  ].freeze

+  Task = Struct.new(:command, :duration) do
+    def cmd
+      command.join(' ')
+    end
+  end
+  NodeAssignment = Struct.new(:index, :tasks, :total_duration) do
+    def total_duration
+      return 0 if tasks.empty?
+
+      tasks.sum(&:duration)
+    end
+  end
+
  # `gettext:updated_check` and `gitlab:sidekiq:sidekiq_queues_yml:check` will fail on FOSS installations
  # (e.g. gitlab-org/gitlab-foss) since they test against a single
  # file that is generated by an EE installation, which can
  # contain values that a FOSS installation won't find. To work
  # around this we will only enable this task on EE installations.
-  TASKS_WITH_DURATIONS_SECONDS = {
-    %w[bin/rake lint:haml] => 800,
+  TASKS_WITH_DURATIONS_SECONDS = [
+    Task.new(%w[bin/rake lint:haml], 562),
    # We need to disable the cache for this cop since it creates files under tmp/feature_flags/*.used,
    # the cache would prevent these files from being created.
-    %w[bundle exec rubocop --only Gitlab/MarkUsedFeatureFlags --cache false] => 600,
-    (Gitlab.ee? ? %w[bin/rake gettext:updated_check] : nil) => 360,
-    %w[yarn run lint:eslint:all] => 312,
-    %w[bundle exec rubocop --parallel] => 300,
-    %w[yarn run lint:prettier] => 162,
-    %w[bin/rake gettext:lint] => 65,
-    %w[bundle exec license_finder] => 61,
-    %w[bin/rake lint:static_verification] => 45,
-    %w[bin/rake config_lint] => 26,
-    %w[bin/rake gitlab:sidekiq:all_queues_yml:check] => 15,
-    (Gitlab.ee? ? %w[bin/rake gitlab:sidekiq:sidekiq_queues_yml:check] : nil) => 11,
-    %w[yarn run internal:stylelint] => 8,
-    %w[scripts/lint-conflicts.sh] => 1,
-    %w[yarn run block-dependencies] => 1,
-    %w[scripts/lint-rugged] => 1,
-    %w[scripts/gemfile_lock_changed.sh] => 1,
-    %w[scripts/frontend/check_no_partial_karma_jest.sh] => 1
-  }.reject { |k| k.nil? }.freeze
-
-  StaticAnalysisTasks = Struct.new(:tasks, :duration)
+    Task.new(%w[bundle exec rubocop --only Gitlab/MarkUsedFeatureFlags --cache false], 800),
+    (Gitlab.ee? ? Task.new(%w[bin/rake gettext:updated_check], 360) : nil),
+    Task.new(%w[yarn run lint:eslint:all], 312),
+    Task.new(%w[bundle exec rubocop --parallel], 60),
+    Task.new(%w[yarn run lint:prettier], 160),
+    Task.new(%w[bin/rake gettext:lint], 85),
+    Task.new(%w[bundle exec license_finder], 20),
+    Task.new(%w[bin/rake lint:static_verification], 35),
+    Task.new(%w[bin/rake config_lint], 10),
+    Task.new(%w[bin/rake gitlab:sidekiq:all_queues_yml:check], 15),
+    (Gitlab.ee? ? Task.new(%w[bin/rake gitlab:sidekiq:sidekiq_queues_yml:check], 11) : nil),
+    Task.new(%w[yarn run internal:stylelint], 8),
+    Task.new(%w[scripts/lint-conflicts.sh], 1),
+    Task.new(%w[yarn run block-dependencies], 1),
+    Task.new(%w[scripts/lint-rugged], 1),
+    Task.new(%w[scripts/gemfile_lock_changed.sh], 1),
+    Task.new(%w[scripts/frontend/check_no_partial_karma_jest.sh], 1)
+  ].reject { |t| t.nil? }.freeze

  def run_tasks!(options = {})
-    node_tasks = tasks_to_run((ENV['CI_NODE_TOTAL'] || 1).to_i, debug: options[:debug])[(ENV['CI_NODE_INDEX'] || 1).to_i - 1]
+    node_assignment = tasks_to_run((ENV['CI_NODE_TOTAL'] || 1).to_i)[(ENV['CI_NODE_INDEX'] || 1).to_i - 1]

    if options[:dry_run]
      puts "Dry-run mode!"
@@ -53,19 +64,21 @@ class StaticAnalysis
    end

    static_analysis = Gitlab::Popen::Runner.new
-
-    static_analysis.run(node_tasks.tasks) do |cmd, &run|
+    start_time = Time.now
+    static_analysis.run(node_assignment.tasks.map(&:command)) do |command, &run|
+      task = node_assignment.tasks.find { |task| task.command == command }
      puts
-      puts "$ #{cmd.join(' ')}"
+      puts "$ #{task.cmd}"

      result = run.call

-      puts "==> Finished in #{result.duration} seconds"
+      puts "==> Finished in #{result.duration} seconds (expected #{task.duration} seconds)"
      puts
    end

    puts
    puts '==================================================='
+    puts "Node finished running all tasks in #{Time.now - start_time} seconds (expected #{node_assignment.total_duration})"
    puts
    puts

@@ -114,49 +127,57 @@ class StaticAnalysis
      .count { |result| !ALLOWED_WARNINGS.include?(result.stderr.strip) }
  end

-  def tasks_to_run(node_total, debug: false)
-    tasks_per_node = Array.new(node_total) { StaticAnalysisTasks.new([], 0) }
-
-    total_time = TASKS_WITH_DURATIONS_SECONDS.values.sum.to_f
+  def tasks_to_run(node_total)
+    total_time = TASKS_WITH_DURATIONS_SECONDS.sum(&:duration).to_f
    ideal_time_per_job = total_time / node_total
-    tasks_by_duration_desc = TASKS_WITH_DURATIONS_SECONDS.sort_by { |a| -a[1] }.to_h
-
-    p "total_time: #{total_time}" if debug
-    p "ideal_time_per_job: #{ideal_time_per_job}" if debug
-
-    tasks_by_duration_desc.each_with_index do |(task, duration), i|
-      puts "Assigning #{task}..." if debug
-      (0...node_total).each do |node_index|
-        puts "Current node: #{node_index}..." if debug
-        # Task is already longer than the ideal time
-        if duration >= ideal_time_per_job && tasks_per_node[node_index].tasks.empty?
-          puts "Assigning #{task} to node #{node_index} (#{duration}s)." if debug
-          assign_task_to_node(tasks_by_duration_desc, tasks_per_node[node_index], task, duration)
-          break
-        elsif tasks_per_node[node_index].duration + duration <= ideal_time_per_job
-          puts "Assigning #{task} to node #{node_index} (#{duration}s)." if debug
-          assign_task_to_node(tasks_by_duration_desc, tasks_per_node[node_index], task, duration)
-          break
-        else
-          puts "Node #{node_index} is already full (#{tasks_per_node[node_index]})" if debug
-        end
+    tasks_by_duration_desc = TASKS_WITH_DURATIONS_SECONDS.sort_by { |a| -a.duration }
+    nodes = Array.new(node_total) { |i| NodeAssignment.new(i + 1, [], 0) }
+
+    puts "Total expected time: #{total_time}; ideal time per job: #{ideal_time_per_job}.\n\n"
+    puts "Tasks to distribute:"
+    tasks_by_duration_desc.each { |task| puts "* #{task.cmd} (#{task.duration}s)" }
+
+    # Distribute tasks optimally first
+    puts "\nAssigning tasks optimally."
+    distribute_tasks(tasks_by_duration_desc, nodes, ideal_time_per_job: ideal_time_per_job)
+
+    # Distribute remaining tasks, ordered by ascending duration
+    leftover_tasks = tasks_by_duration_desc - nodes.flat_map(&:tasks)
+
+    if leftover_tasks.any?
+      puts "\n\nAssigning remaining tasks: #{leftover_tasks.flat_map(&:cmd)}"
+      distribute_tasks(leftover_tasks, nodes.sort_by { |node| node.total_duration })
    end
+
+    nodes.each do |node|
+      puts "\nExpected duration for node #{node.index}: #{node.total_duration} seconds"
+      node.tasks.each { |task| puts "* #{task.cmd} (#{task.duration}s)" }
    end

-    raise "There are unassigned tasks: #{tasks_by_duration_desc}" unless tasks_by_duration_desc.empty?
+    nodes
+  end

-    tasks_per_node.each_with_index do |node, i|
-      puts "\nExpected duration for node #{i + 1}: #{node.duration}"
-      node.tasks.each { |task| puts "- #{task.join(' ')}" }
+  def distribute_tasks(tasks, nodes, ideal_time_per_job: nil)
+    condition =
+      if ideal_time_per_job
+        ->(task, node, ideal_time_per_job) { (task.duration + node.total_duration) <= ideal_time_per_job }
+      else
+        ->(*) { true }
      end

-    tasks_per_node
+    tasks.each do |task|
+      nodes.each do |node|
+        if condition.call(task, node, ideal_time_per_job)
+          assign_task_to_node(tasks, node, task)
+          break
+        end
+      end
+    end
  end

-  def assign_task_to_node(remaining_tasks, node, task_name, duration)
-    node.tasks << task_name
-    node.duration += duration
-    remaining_tasks.delete(task_name)
+  def assign_task_to_node(remaining_tasks, node, task)
+    node.tasks << task
+    puts "Assigning #{task.command} (#{task.duration}s) to node ##{node.index}. Node total duration: #{node.total_duration}s."
  end
 end

@@ -167,9 +188,5 @@ if $0 == __FILE__
    options[:dry_run] = true
  end

-  if ARGV.include?('--debug')
-    options[:debug] = true
-  end
-
  StaticAnalysis.new.run_tasks!(options)
 end