Commit bc123808 authored by Stan Hu's avatar Stan Hu

Merge branch 'sidekiq-arguments-logging-tokens' into 'master'

Filter sensitive Sidekiq arguments

See merge request gitlab-org/gitlab!33967
parents ec6d2692 b8fbd34d
......@@ -9,6 +9,7 @@ class AuthorizedKeysWorker
urgency :high
weight 2
idempotent!
loggable_arguments 0
def perform(action, *args)
return unless Gitlab::CurrentSettings.authorized_keys_enabled?
......
......@@ -5,6 +5,7 @@ class BackgroundMigrationWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :database
urgency :throttled
loggable_arguments 0, 1
# The minimum amount of time between processing two jobs of the same migration
# class.
......
......@@ -5,6 +5,7 @@ class CleanupContainerRepositoryWorker # rubocop:disable Scalability/IdempotentW
queue_namespace :container_repository
feature_category :container_registry
loggable_arguments 2
attr_reader :container_repository, :current_user
......
......@@ -6,6 +6,7 @@ class ClusterInstallAppWorker # rubocop:disable Scalability/IdempotentWorker
include ClusterApplications
worker_has_external_dependencies!
loggable_arguments 0
def perform(app_name, app_id)
find_application(app_name, app_id) do |app|
......
......@@ -6,6 +6,7 @@ class ClusterPatchAppWorker # rubocop:disable Scalability/IdempotentWorker
include ClusterApplications
worker_has_external_dependencies!
loggable_arguments 0
def perform(app_name, app_id)
find_application(app_name, app_id) do |app|
......
......@@ -9,6 +9,7 @@ class ClusterUpdateAppWorker # rubocop:disable Scalability/IdempotentWorker
include ExclusiveLeaseGuard
sidekiq_options retry: 3, dead: false
loggable_arguments 0, 3
LEASE_TIMEOUT = 10.minutes.to_i
......
......@@ -6,6 +6,7 @@ class ClusterUpgradeAppWorker # rubocop:disable Scalability/IdempotentWorker
include ClusterApplications
worker_has_external_dependencies!
loggable_arguments 0
def perform(app_name, app_id)
find_application(app_name, app_id) do |app|
......
......@@ -10,6 +10,7 @@ class ClusterWaitForAppInstallationWorker # rubocop:disable Scalability/Idempote
worker_has_external_dependencies!
worker_resource_boundary :cpu
loggable_arguments 0
def perform(app_name, app_id)
find_application(app_name, app_id) do |app|
......
......@@ -8,6 +8,8 @@ class ClusterWaitForAppUpdateWorker # rubocop:disable Scalability/IdempotentWork
INTERVAL = 10.seconds
TIMEOUT = 20.minutes
loggable_arguments 0
def perform(app_name, app_id)
find_application(app_name, app_id) do |app|
::Clusters::Applications::CheckUpgradeProgressService.new(app).execute
......
......@@ -6,6 +6,7 @@ class ClusterWaitForIngressIpAddressWorker # rubocop:disable Scalability/Idempot
include ClusterApplications
worker_has_external_dependencies!
loggable_arguments 0
def perform(app_name, app_id)
find_application(app_name, app_id) do |app|
......
......@@ -6,6 +6,8 @@ module Clusters
include ApplicationWorker
include ClusterQueue
loggable_arguments 1
def perform(cluster_id, service_name)
cluster = Clusters::Cluster.find_by_id(cluster_id)
return unless cluster
......
......@@ -6,6 +6,8 @@ module Clusters
include ApplicationWorker
include ClusterQueue
loggable_arguments 1
def perform(cluster_id, service_name)
cluster = Clusters::Cluster.find_by_id(cluster_id)
raise cluster_missing_error(service_name) unless cluster
......
......@@ -8,6 +8,7 @@ module Clusters
include ClusterApplications
worker_has_external_dependencies!
loggable_arguments 0
def perform(app_name, app_id)
find_application(app_name, app_id) do |app|
......
......@@ -12,6 +12,7 @@ module Clusters
worker_has_external_dependencies!
worker_resource_boundary :cpu
loggable_arguments 0
def perform(app_name, app_id)
find_application(app_name, app_id) do |app|
......
......@@ -76,6 +76,22 @@ module ApplicationWorker
get_sidekiq_options['queue'].to_s
end
# Set/get which arguments can be logged and sent to Sentry.
#
# Numeric arguments are logged by default, so there is no need to
# list those.
#
# Non-numeric arguments must be listed by position, as Sidekiq
# cannot see argument names.
#
def loggable_arguments(*args)
if args.any?
@loggable_arguments = args
else
@loggable_arguments || []
end
end
def queue_size
Sidekiq::Queue.new(queue).size
end
......
......@@ -7,6 +7,7 @@ module ReactiveCacheableWorker
include ApplicationWorker
feature_category_not_owned!
loggable_arguments 0
def self.context_for_arguments(arguments)
class_name, *_other_args = arguments
......
......@@ -5,8 +5,8 @@ class CreateCommitSignatureWorker
feature_category :source_code_management
weight 2
idempotent!
loggable_arguments 0
# rubocop: disable CodeReuse/ActiveRecord
def perform(commit_shas, project_id)
......
......@@ -8,6 +8,7 @@ class CreatePipelineWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :continuous_integration
urgency :high
worker_resource_boundary :cpu
loggable_arguments 2, 3, 4
def perform(project_id, user_id, ref, source, params = {})
project = Project.find(project_id)
......
......@@ -4,6 +4,7 @@ class DeleteStoredFilesWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
feature_category_not_owned!
loggable_arguments 0
def perform(class_name, keys)
klass = begin
......
......@@ -4,6 +4,7 @@ class DeleteUserWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
feature_category :authentication_and_authorization
loggable_arguments 2
def perform(current_user_id, delete_user_id, options = {})
delete_user = User.find(delete_user_id)
......
......@@ -5,6 +5,7 @@ class ExportCsvWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :issue_tracking
worker_resource_boundary :cpu
loggable_arguments 2
def perform(current_user_id, project_id, params)
@current_user = User.find(current_user_id)
......
......@@ -5,6 +5,7 @@ class FileHookWorker # rubocop:disable Scalability/IdempotentWorker
sidekiq_options retry: false
feature_category :integrations
loggable_arguments 0
def perform(file_name, data)
success, message = Gitlab::FileHook.execute(file_name, data)
......
......@@ -5,6 +5,7 @@ class GitGarbageCollectWorker # rubocop:disable Scalability/IdempotentWorker
sidekiq_options retry: false
feature_category :gitaly
loggable_arguments 1, 2, 3
# Timeout set to 24h
LEASE_TIMEOUT = 86400
......
......@@ -12,6 +12,7 @@ module Gitlab
sidekiq_options dead: false
feature_category :importers
loggable_arguments 1, 2
# The known importer stages and their corresponding Sidekiq workers.
STAGES = {
......
......@@ -8,6 +8,8 @@ module Gitlab
include Gitlab::JiraImport::QueueOptions
include Gitlab::Import::DatabaseHelpers
loggable_arguments 3
def perform(project_id, jira_issue_id, issue_attributes, waiter_key)
issue_id = create_issue(issue_attributes, project_id)
JiraImport.cache_issue_mapping(issue_id, jira_issue_id, project_id)
......
......@@ -7,6 +7,7 @@ class GitlabShellWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :source_code_management
urgency :high
weight 2
loggable_arguments 0
def perform(action, *arg)
# Gitlab::Shell is being removed but we need to continue to process jobs
......
......@@ -5,6 +5,7 @@ class GroupExportWorker # rubocop:disable Scalability/IdempotentWorker
include ExceptionBacktrace
feature_category :importers
loggable_arguments 2
def perform(current_user_id, group_id, params = {})
current_user = User.find(current_user_id)
......
......@@ -5,6 +5,7 @@ module HashedStorage
include ApplicationWorker
queue_namespace :hashed_storage
loggable_arguments 1
attr_reader :project_id
......
......@@ -5,6 +5,7 @@ module HashedStorage
include ApplicationWorker
queue_namespace :hashed_storage
loggable_arguments 1
attr_reader :project_id
......
......@@ -9,6 +9,7 @@ module MailScheduler
feature_category :issue_tracking
worker_resource_boundary :cpu
loggable_arguments 0
def perform(meth, *args)
check_arguments!(args)
......
......@@ -6,6 +6,7 @@ class MergeWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :source_code_management
urgency :high
weight 5
loggable_arguments 2
def perform(merge_request_id, current_user_id, params)
params = params.with_indifferent_access
......
......@@ -7,6 +7,7 @@ module ObjectStorage
sidekiq_options retry: 5
feature_category_not_owned!
loggable_arguments 0, 1, 2, 3
def perform(uploader_class_name, subject_class_name, file_field, subject_id)
uploader_class = uploader_class_name.constantize
......
......@@ -7,6 +7,7 @@ module ObjectStorage
include ObjectStorageQueue
feature_category_not_owned!
loggable_arguments 0, 1, 2, 3
SanityCheckError = Class.new(StandardError)
......
......@@ -5,6 +5,7 @@ class PagesWorker # rubocop:disable Scalability/IdempotentWorker
sidekiq_options retry: 3
feature_category :pages
loggable_arguments 0, 1
def perform(action, *arg)
send(action, *arg) # rubocop:disable GitlabSecurity/PublicSend
......
......@@ -7,6 +7,7 @@ class PipelineProcessWorker # rubocop:disable Scalability/IdempotentWorker
queue_namespace :pipeline_processing
feature_category :continuous_integration
urgency :high
loggable_arguments 1
# rubocop: disable CodeReuse/ActiveRecord
def perform(pipeline_id, build_ids = nil)
......
......@@ -7,6 +7,7 @@ class PostReceive # rubocop:disable Scalability/IdempotentWorker
urgency :high
worker_resource_boundary :cpu
weight 5
loggable_arguments 0, 1, 2, 3
def perform(gl_repository, identifier, changes, push_options = {})
container, project, repo_type = Gitlab::GlRepository.parse(gl_repository)
......
......@@ -13,8 +13,8 @@ class ProcessCommitWorker
feature_category :source_code_management
urgency :high
weight 3
idempotent!
loggable_arguments 2, 3
# project_id - The ID of the project this commit belongs to.
# user_id - The ID of the user that pushed the commit.
......
......@@ -4,11 +4,11 @@
class ProjectCacheWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
urgency :high
LEASE_TIMEOUT = 15.minutes.to_i
feature_category :source_code_management
urgency :high
loggable_arguments 1, 2, 3
# project_id - The ID of the project for which to flush the cache.
# files - An Array containing extra types of files to refresh such as
......
......@@ -8,6 +8,7 @@ class ProjectExportWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :importers
worker_resource_boundary :memory
urgency :throttled
loggable_arguments 2, 3
def perform(current_user_id, project_id, after_export_strategy = {}, params = {})
current_user = User.find(current_user_id)
......
......@@ -4,8 +4,8 @@ class PropagateIntegrationWorker
include ApplicationWorker
feature_category :integrations
idempotent!
loggable_arguments 1
def perform(integration_id, overwrite)
Admin::PropagateIntegrationService.propagate(
......
......@@ -7,6 +7,7 @@ class RebaseWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :source_code_management
weight 2
loggable_arguments 2
def perform(merge_request_id, current_user_id, skip_ci = false)
current_user = User.find(current_user_id)
......
......@@ -12,6 +12,8 @@ module RepositoryCheck
attr_reader :shard_name
loggable_arguments 0
def perform(shard_name)
@shard_name = shard_name
......
......@@ -5,6 +5,7 @@ class RepositoryRemoveRemoteWorker # rubocop:disable Scalability/IdempotentWorke
include ExclusiveLeaseGuard
feature_category :source_code_management
loggable_arguments 1
LEASE_TIMEOUT = 1.hour
......
......@@ -10,6 +10,7 @@ class RepositoryUpdateRemoteMirrorWorker # rubocop:disable Scalability/Idempoten
sidekiq_options retry: 3, dead: false
feature_category :source_code_management
loggable_arguments 1
LOCK_WAIT_TIME = 30.seconds
MAX_TRIES = 3
......
......@@ -5,6 +5,8 @@ module TodosDestroyer
include ApplicationWorker
include TodosDestroyerQueue
loggable_arguments 2
def perform(user_id, entity_id, entity_type)
::Todos::Destroy::EntityLeaveService.new(user_id, entity_id, entity_type).execute
end
......
......@@ -5,6 +5,7 @@ class UpdateExternalPullRequestsWorker # rubocop:disable Scalability/IdempotentW
feature_category :source_code_management
weight 3
loggable_arguments 2
def perform(project_id, user_id, ref)
project = Project.find_by_id(project_id)
......
......@@ -7,6 +7,7 @@ class UpdateMergeRequestsWorker # rubocop:disable Scalability/IdempotentWorker
urgency :high
worker_resource_boundary :cpu
weight 3
loggable_arguments 2, 3, 4
LOG_TIME_THRESHOLD = 90 # seconds
......
......@@ -5,6 +5,7 @@ class WebHookWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :integrations
worker_has_external_dependencies!
loggable_arguments 2
sidekiq_options retry: 4, dead: false
......
---
title: Filter potentially-sensitive Sidekiq arguments from logs and Sentry
merge_request: 33967
author:
type: changed
......@@ -29,9 +29,18 @@ Example:
gitlab_rails['env'] = {"SIDEKIQ_LOG_ARGUMENTS" => "1"}
```
Please note: It is not recommend to enable this setting in production because some
Sidekiq jobs (such as sending a password reset email) take secret arguments (for
example the password reset token).
This does not log all job arguments. To avoid logging sensitive
information (for instance, password reset tokens), it logs numeric
arguments for all workers, with overrides for some specific workers
where their arguments are not sensitive.
Example log output:
```json
{"severity":"INFO","time":"2020-06-08T14:37:37.892Z","class":"AdminEmailsWorker","args":["[FILTERED]","[FILTERED]","[FILTERED]"],"retry":3,"queue":"admin_emails","backtrace":true,"jid":"9e35e2674ac7b12d123e13cc","created_at":"2020-06-08T14:37:37.373Z","meta.user":"root","meta.caller_id":"Admin::EmailsController#create","correlation_id":"37D3lArJmT1","uber-trace-id":"2d942cc98cc1b561:6dc94409cfdd4d77:9fbe19bdee865293:1","enqueued_at":"2020-06-08T14:37:37.410Z","pid":65011,"message":"AdminEmailsWorker JID-9e35e2674ac7b12d123e13cc: done: 0.48085 sec","job_status":"done","scheduling_latency_s":0.001012,"redis_calls":9,"redis_duration_s":0.004608,"redis_read_bytes":696,"redis_write_bytes":6141,"duration_s":0.48085,"cpu_s":0.308849,"completed_at":"2020-06-08T14:37:37.892Z","db_duration_s":0.010742}
{"severity":"INFO","time":"2020-06-08T14:37:37.894Z","class":"ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper","wrapped":"ActionMailer::DeliveryJob","queue":"mailers","args":["[FILTERED]"],"retry":3,"backtrace":true,"jid":"e47a4f6793d475378432e3c8","created_at":"2020-06-08T14:37:37.884Z","meta.user":"root","meta.caller_id":"AdminEmailsWorker","correlation_id":"37D3lArJmT1","uber-trace-id":"2d942cc98cc1b561:29344de0f966446d:5c3b0e0e1bef987b:1","enqueued_at":"2020-06-08T14:37:37.885Z","pid":65011,"message":"ActiveJob::QueueAdapters::SidekiqAdapter::JobWrapper JID-e47a4f6793d475378432e3c8: start","job_status":"start","scheduling_latency_s":0.009473}
{"severity":"INFO","time":"2020-06-08T14:39:50.648Z","class":"NewIssueWorker","args":["455","1"],"retry":3,"queue":"new_issue","backtrace":true,"jid":"a24af71f96fd129ec47f5d1e","created_at":"2020-06-08T14:39:50.643Z","meta.user":"root","meta.project":"h5bp/html5-boilerplate","meta.root_namespace":"h5bp","meta.caller_id":"Projects::IssuesController#create","correlation_id":"f9UCZHqhuP7","uber-trace-id":"28f65730f99f55a3:a5d2b62dec38dffc:48ddd092707fa1b7:1","enqueued_at":"2020-06-08T14:39:50.646Z","pid":65011,"message":"NewIssueWorker JID-a24af71f96fd129ec47f5d1e: start","job_status":"start","scheduling_latency_s":0.001144}
```
When using [Sidekiq JSON logging](../logs.md#sidekiqlog),
arguments logs are limited to a maximum size of 10 kilobytes of text;
......
......@@ -518,6 +518,34 @@ job needs to be scheduled with.
The `context_proc` which needs to return a hash with the context
information for the job.
## Arguments logging
When [`SIDEKIQ_LOG_ARGUMENTS`](../administration/troubleshooting/sidekiq.md#log-arguments-to-sidekiq-jobs)
is enabled, Sidekiq job arguments will be logged.
By default, the only arguments logged are numeric arguments, because
arguments of other types could contain sensitive information. To
override this, use `loggable_arguments` inside a worker with the indexes
of the arguments to be logged. (Numeric arguments do not need to be
specified here.)
For example:
```ruby
class MyWorker
include ApplicationWorker
loggable_arguments 1, 3
# object_id will be logged as it's numeric
# string_a will be logged due to the loggable_arguments call
# string_b will be filtered from logs
# string_c will be logged due to the loggable_arguments call
def perform(object_id, string_a, string_b, string_c)
end
end
```
## Tests
Each Sidekiq worker must be tested using RSpec, just like any other class. These
......
......@@ -5,8 +5,8 @@ module Analytics
include ApplicationWorker
feature_category :code_analytics
idempotent!
loggable_arguments 0
def perform(operation, merge_request_id, execute_kwargs = {})
::MergeRequest.find_by_id(merge_request_id).try do |merge_request|
......
......@@ -18,6 +18,7 @@ module Geo
include ::Gitlab::Utils::StrongMemoize
sidekiq_options retry: false
loggable_arguments 0
attr_reader :shard_name, :start_time, :loops
end
......
......@@ -8,6 +8,7 @@ class ElasticCommitIndexerWorker
sidekiq_options retry: 2
urgency :throttled
idempotent!
loggable_arguments 1, 2, 3
# Performs the commits and blobs indexation
#
......
......@@ -7,6 +7,7 @@ class ElasticIndexerWorker # rubocop:disable Scalability/IdempotentWorker
sidekiq_options retry: 2
feature_category :global_search
urgency :throttled
loggable_arguments 0, 1, 4
def perform(operation, class_name, record_id, es_id, options = {})
return true unless Gitlab::CurrentSettings.elasticsearch_indexing?
......
......@@ -5,6 +5,7 @@ class ElasticNamespaceIndexerWorker # rubocop:disable Scalability/IdempotentWork
feature_category :global_search
sidekiq_options retry: 2
loggable_arguments 1
def perform(namespace_id, operation)
return true unless Gitlab::CurrentSettings.elasticsearch_indexing?
......
......@@ -5,6 +5,7 @@ class ElasticNamespaceRolloutWorker # rubocop:disable Scalability/IdempotentWork
feature_category :global_search
sidekiq_options retry: 2
loggable_arguments 0, 2
ROLLOUT = 'rollout'
ROLLBACK = 'rollback'
......
......@@ -18,6 +18,8 @@ module Geo
OPERATIONS = [:resync_repositories, :reverify_repositories].freeze
DELAY_INTERVAL = 10.seconds.to_i # base delay for scheduling batch execution
loggable_arguments 0
def perform(operation)
return fail_invalid_operation!(operation) unless OPERATIONS.include?(operation.to_sym)
......
......@@ -14,6 +14,8 @@ module Geo
BATCH_SIZE = 250
OPERATIONS = [:resync_repositories, :reverify_repositories].freeze
loggable_arguments 0, 1
def perform(operation, range)
case operation.to_sym
when :resync_repositories
......
......@@ -9,6 +9,7 @@ module Geo
sidekiq_options retry: 3, dead: false
idempotent!
loggable_arguments 0
def perform(replicable_name, replicable_id)
replicator = ::Gitlab::Geo::Replicator.for_replicable_params(replicable_name: replicable_name, replicable_id: replicable_id)
......
......@@ -7,6 +7,7 @@ module Geo
include ::Gitlab::Geo::LogHelpers
sidekiq_options retry: 3, dead: false
loggable_arguments 0, 1, 2
def perform(replicable_name, event_name, payload)
Geo::EventService.new(replicable_name, event_name, payload).execute
......
......@@ -6,6 +6,7 @@ module Geo
include GeoQueue
sidekiq_options retry: 3, dead: false
loggable_arguments 0
def perform(object_type, object_id)
Geo::FileDownloadService.new(object_type.to_sym, object_id).execute
......
......@@ -6,6 +6,8 @@ module Geo
include GeoQueue
include ::Gitlab::Geo::LogHelpers
loggable_arguments 0, 2
def perform(object_type, object_db_id, file_path = nil)
log_info('Executing Geo::FileRegistryRemovalService', id: object_db_id, type: object_type, file_path: file_path)
......
......@@ -6,6 +6,8 @@ module Geo
include Gitlab::Geo::LogHelpers
include GeoQueue
loggable_arguments 0
def perform(file_path)
remove_file!(file_path)
end
......
......@@ -5,6 +5,8 @@ module Geo
include ApplicationWorker
include GeoQueue
loggable_arguments 1, 2
def perform(project_id, old_attachments_path, new_attachments_path)
Geo::HashedStorageAttachmentsMigrationService.new(
project_id,
......
......@@ -5,6 +5,8 @@ module Geo
include ApplicationWorker
include GeoQueue
loggable_arguments 1, 2, 2
def perform(project_id, old_disk_path, new_disk_path, old_storage_version)
Geo::HashedStorageMigrationService.new(
project_id,
......
......@@ -14,6 +14,8 @@ module Geo
Sidekiq.logger.warn "Failed #{msg['class']} with #{msg['args']}: #{msg['error_message']}"
end
loggable_arguments 1
# rubocop: disable CodeReuse/ActiveRecord
def perform(project_id, options = {})
options.symbolize_keys!
......
......@@ -5,6 +5,8 @@ module Geo
include ApplicationWorker
include GeoQueue
loggable_arguments 1, 2
def perform(project_id, old_disk_path, new_disk_path)
Geo::RenameRepositoryService.new(project_id, old_disk_path, new_disk_path).execute
end
......
......@@ -7,6 +7,8 @@ module Geo
include ::Gitlab::Geo::LogHelpers
include ::Gitlab::Utils::StrongMemoize
loggable_arguments 1, 2, 3
def perform(project_id, name, disk_path, storage_name)
return unless current_node.secondary?
......
......@@ -3,6 +3,7 @@
module Geo
class RepositoryShardSyncWorker < Geo::Scheduler::Secondary::SchedulerWorker # rubocop:disable Scalability/IdempotentWorker
sidekiq_options retry: false
loggable_arguments 0
attr_accessor :shard_name
......
......@@ -5,6 +5,7 @@ module Geo
module Primary
class ShardWorker < Geo::Scheduler::Primary::SchedulerWorker # rubocop:disable Scalability/IdempotentWorker
sidekiq_options retry: false
loggable_arguments 0
attr_accessor :shard_name
......
......@@ -10,6 +10,8 @@ module Geo
# rubocop:enable Scalability/CronWorkerContext
attr_accessor :shard_name
loggable_arguments 0
def perform(shard_name)
@shard_name = shard_name
......
......@@ -4,6 +4,8 @@ class GeoRepositoryDestroyWorker # rubocop:disable Scalability/IdempotentWorker
include ApplicationWorker
include GeoQueue
loggable_arguments 1, 2, 3
def perform(id, name, disk_path, storage_name)
Geo::RepositoryDestroyService.new(id, name, disk_path, storage_name).execute
end
......
......@@ -6,6 +6,7 @@ module JiraConnect
queue_namespace :jira_connect
feature_category :integrations
loggable_arguments 1, 2
def perform(project_id, branch_name, commit_shas)
project = Project.find_by_id(project_id)
......
......@@ -6,6 +6,7 @@ class LdapGroupSyncWorker # rubocop:disable Scalability/IdempotentWorker
feature_category :authentication_and_authorization
worker_has_external_dependencies!
weight 2
loggable_arguments 0, 1
# rubocop: disable CodeReuse/ActiveRecord
def perform(group_ids, provider = nil)
......
......@@ -26,10 +26,13 @@ module Gitlab
# Sanitize fields based on those sanitized from Rails.
config.sanitize_fields = Rails.application.config.filter_parameters.map(&:to_s)
config.processors << ::Gitlab::ErrorTracking::Processor::SidekiqProcessor
# Sanitize authentication headers
config.sanitize_http_headers = %w[Authorization Private-Token]
config.tags = { program: Gitlab.process_name }
config.before_send = method(:before_send)
yield config if block_given?
end
end
......
# frozen_string_literal: true
require 'set'
module Gitlab
module ErrorTracking
module Processor
class SidekiqProcessor < ::Raven::Processor
FILTERED_STRING = '[FILTERED]'
def self.filter_arguments(args, klass)
args.lazy.with_index.map do |arg, i|
case arg
when Numeric
arg
else
if permitted_arguments_for_worker(klass).include?(i)
arg
else
FILTERED_STRING
end
end
end
end
def self.permitted_arguments_for_worker(klass)
@permitted_arguments_for_worker ||= {}
@permitted_arguments_for_worker[klass] ||=
begin
klass.constantize&.loggable_arguments&.to_set
rescue
Set.new
end
end
def self.loggable_arguments(args, klass)
Gitlab::Utils::LogLimitedArray
.log_limited_array(filter_arguments(args, klass))
.map(&:to_s)
.to_a
end
def process(value, key = nil)
sidekiq = value.dig(:extra, :sidekiq)
return value unless sidekiq
sidekiq = sidekiq.deep_dup
sidekiq.delete(:jobstr)
# 'args' in this hash => from Gitlab::ErrorTracking.track_*
# 'args' in :job => from default error handler
job_holder = sidekiq.key?('args') ? sidekiq : sidekiq[:job]
if job_holder['args']
job_holder['args'] = self.class.filter_arguments(job_holder['args'], job_holder['class']).to_a
end
value[:extra][:sidekiq] = sidekiq
value
end
end
end
end
end
......@@ -14,18 +14,11 @@ module Gitlab
payload.delete('extra.server')
# The raven extra context is populated by Raven::SidekiqCleanupMiddleware.
#
# It contains the full sidekiq job which consists of mixed types and nested
# objects. That causes a bunch of issues when trying to ingest logs into
# Elasticsearch.
#
# We apply a stricter schema here that forces the args to be an array of
# strings. This same logic exists in Gitlab::SidekiqLogging::JSONFormatter.
payload['extra.sidekiq'].tap do |value|
if value.is_a?(Hash) && value.key?('args')
value = value.dup
payload['extra.sidekiq']['args'] = Gitlab::Utils::LogLimitedArray.log_limited_array(value['args'].try(:map, &:to_s))
payload['extra.sidekiq']['args'] = Gitlab::ErrorTracking::Processor::SidekiqProcessor
.loggable_arguments(value['args'], value['class'])
end
end
......
......@@ -18,10 +18,15 @@ module Gitlab
when String
output[:message] = data
when Hash
convert_to_iso8601!(data)
convert_retry_to_integer!(data)
stringify_args!(data)
output.merge!(data)
# jobstr is redundant and can include information we wanted to
# exclude (like arguments)
output.delete(:jobstr)
convert_to_iso8601!(output)
convert_retry_to_integer!(output)
process_args!(output)
end
output.to_json + "\n"
......@@ -56,8 +61,11 @@ module Gitlab
end
end
def stringify_args!(payload)
payload['args'] = Gitlab::Utils::LogLimitedArray.log_limited_array(payload['args'].map(&:to_s)) if payload['args']
def process_args!(payload)
return unless payload['args']
payload['args'] = Gitlab::ErrorTracking::Processor::SidekiqProcessor
.loggable_arguments(payload['args'], payload['class'])
end
end
end
......
......@@ -9,14 +9,14 @@ module Gitlab
# to around 10 kilobytes. Once we hit the limit, add the sentinel
# value as the last item in the returned array.
def self.log_limited_array(array, sentinel: '...')
return [] unless array.is_a?(Array)
return [] unless array.is_a?(Array) || array.is_a?(Enumerator::Lazy)
total_length = 0
limited_array = array.take_while do |arg|
total_length += JsonSizeEstimator.estimate(arg)
total_length <= MAXIMUM_ARRAY_LENGTH
end
end.to_a
limited_array.push(sentinel) if total_length > MAXIMUM_ARRAY_LENGTH
......
# frozen_string_literal: true
require 'spec_helper'
require 'rspec-parameterized'
RSpec.describe Gitlab::ErrorTracking::Processor::SidekiqProcessor do
after do
if described_class.instance_variable_defined?(:@permitted_arguments_for_worker)
described_class.remove_instance_variable(:@permitted_arguments_for_worker)
end
end
describe '.filter_arguments' do
it 'returns a lazy enumerator' do
filtered = described_class.filter_arguments([1, 'string'], 'TestWorker')
expect(filtered).to be_a(Enumerator::Lazy)
expect(filtered.to_a).to eq([1, described_class::FILTERED_STRING])
end
context 'arguments filtering' do
using RSpec::Parameterized::TableSyntax
where(:klass, :expected) do
'UnknownWorker' | [1, described_class::FILTERED_STRING, described_class::FILTERED_STRING, described_class::FILTERED_STRING]
'NoPermittedArguments' | [1, described_class::FILTERED_STRING, described_class::FILTERED_STRING, described_class::FILTERED_STRING]
'OnePermittedArgument' | [1, 'string', described_class::FILTERED_STRING, described_class::FILTERED_STRING]
'AllPermittedArguments' | [1, 'string', [1, 2], { a: 1 }]
end
with_them do
before do
stub_const('NoPermittedArguments', double(loggable_arguments: []))
stub_const('OnePermittedArgument', double(loggable_arguments: [1]))
stub_const('AllPermittedArguments', double(loggable_arguments: [0, 1, 2, 3]))
end
it do
expect(described_class.filter_arguments([1, 'string', [1, 2], { a: 1 }], klass).to_a)
.to eq(expected)
end
end
end
end
describe '.permitted_arguments_for_worker' do
it 'returns the loggable_arguments for a worker class as a set' do
stub_const('TestWorker', double(loggable_arguments: [1, 1]))
expect(described_class.permitted_arguments_for_worker('TestWorker'))
.to eq([1].to_set)
end
it 'returns an empty set when the worker class does not exist' do
expect(described_class.permitted_arguments_for_worker('TestWorker'))
.to eq(Set.new)
end
it 'returns an empty set when the worker class does not respond to loggable_arguments' do
stub_const('TestWorker', 1)
expect(described_class.permitted_arguments_for_worker('TestWorker'))
.to eq(Set.new)
end
it 'returns an empty set when loggable_arguments cannot be converted to a set' do
stub_const('TestWorker', double(loggable_arguments: 1))
expect(described_class.permitted_arguments_for_worker('TestWorker'))
.to eq(Set.new)
end
it 'memoizes the results' do
worker_class = double
stub_const('TestWorker', worker_class)
expect(worker_class).to receive(:loggable_arguments).once.and_return([])
described_class.permitted_arguments_for_worker('TestWorker')
described_class.permitted_arguments_for_worker('TestWorker')
end
end
describe '.loggable_arguments' do
it 'filters and limits the arguments, then converts to strings' do
half_limit = Gitlab::Utils::LogLimitedArray::MAXIMUM_ARRAY_LENGTH / 2
args = [[1, 2], 'a' * half_limit, 'b' * half_limit, 'c' * half_limit, 'd']
stub_const('LoggableArguments', double(loggable_arguments: [0, 1, 3, 4]))
expect(described_class.loggable_arguments(args, 'LoggableArguments'))
.to eq(['[1, 2]', 'a' * half_limit, '[FILTERED]', '...'])
end
end
describe '#process' do
context 'when there is Sidekiq data' do
shared_examples 'Sidekiq arguments' do |args_in_job_hash: true|
let(:path) { [:extra, :sidekiq, args_in_job_hash ? :job : nil, 'args'].compact }
let(:args) { [1, 'string', { a: 1 }, [1, 2]] }
it 'only allows numeric arguments for an unknown worker' do
value = { 'args' => args, 'class' => 'UnknownWorker' }
value = { job: value } if args_in_job_hash
expect(subject.process(extra_sidekiq(value)).dig(*path))
.to eq([1, described_class::FILTERED_STRING, described_class::FILTERED_STRING, described_class::FILTERED_STRING])
end
it 'allows all argument types for a permitted worker' do
value = { 'args' => args, 'class' => 'PostReceive' }
value = { job: value } if args_in_job_hash
expect(subject.process(extra_sidekiq(value)).dig(*path))
.to eq(args)
end
end
context 'when processing via the default error handler' do
include_examples 'Sidekiq arguments', args_in_job_hash: true
end
context 'when processing via Gitlab::ErrorTracking' do
include_examples 'Sidekiq arguments', args_in_job_hash: false
end
it 'removes a jobstr field if present' do
value = {
job: { 'args' => [1] },
jobstr: { 'args' => [1] }.to_json
}
expect(subject.process(extra_sidekiq(value)))
.to eq(extra_sidekiq(value.except(:jobstr)))
end
it 'does nothing with no jobstr' do
value = { job: { 'args' => [1] } }
expect(subject.process(extra_sidekiq(value)))
.to eq(extra_sidekiq(value))
end
end
context 'when there is no Sidekiq data' do
it 'does nothing' do
value = {
request: {
method: 'POST',
data: { 'key' => 'value' }
}
}
expect(subject.process(value)).to eq(value)
end
end
def extra_sidekiq(hash)
{ extra: { sidekiq: hash } }
end
end
end
......@@ -2,6 +2,8 @@
require 'spec_helper'
require 'raven/transports/dummy'
describe Gitlab::ErrorTracking do
let(:exception) { RuntimeError.new('boom') }
let(:issue_url) { 'http://gitlab.com/gitlab-org/gitlab-foss/issues/1' }
......@@ -22,7 +24,9 @@ describe Gitlab::ErrorTracking do
allow(described_class).to receive(:sentry_dsn).and_return(Gitlab.config.sentry.dsn)
allow(Labkit::Correlation::CorrelationId).to receive(:current_id).and_return('cid')
described_class.configure
described_class.configure do |config|
config.encoding = 'json'
end
end
describe '.with_context' do
......@@ -181,13 +185,26 @@ describe Gitlab::ErrorTracking do
end
context 'with sidekiq args' do
let(:extra) { { sidekiq: { 'args' => [1, { 'id' => 2, 'name' => 'hello' }, 'some-value', 'another-value'] } } }
it 'ensures extra.sidekiq.args is a string' do
extra = { sidekiq: { 'class' => 'PostReceive', 'args' => [1, { 'id' => 2, 'name' => 'hello' }, 'some-value', 'another-value'] } }
expect(Gitlab::ErrorTracking::Logger).to receive(:error).with(
hash_including({ 'extra.sidekiq' => { 'class' => 'PostReceive', 'args' => ['1', '{"id"=>2, "name"=>"hello"}', 'some-value', 'another-value'] } }))
described_class.track_exception(exception, extra)
end
it 'filters sensitive arguments before sending' do
extra = { sidekiq: { 'class' => 'UnknownWorker', 'args' => ['sensitive string', 1, 2] } }
expect(Gitlab::ErrorTracking::Logger).to receive(:error).with(
hash_including({ 'extra.sidekiq' => { 'args' => ['1', '{"id"=>2, "name"=>"hello"}', 'some-value', 'another-value'] } }))
hash_including('extra.sidekiq' => { 'class' => 'UnknownWorker', 'args' => ['[FILTERED]', '1', '2'] }))
described_class.track_exception(exception, extra)
sentry_event = Gitlab::Json.parse(Raven.client.transport.events.last[1])
expect(sentry_event.dig('extra', 'sidekiq', 'args')).to eq(['[FILTERED]', 1, 2])
end
end
end
......
......@@ -14,6 +14,7 @@ describe Gitlab::SidekiqLogging::JSONFormatter do
let(:hash_input) do
{
foo: 1,
'class' => 'PostReceive',
'bar' => 'test',
'created_at' => timestamp,
'enqueued_at' => timestamp,
......@@ -42,21 +43,47 @@ describe Gitlab::SidekiqLogging::JSONFormatter do
expect(subject).to eq(expected_output)
end
context 'when the job args are bigger than the maximum allowed' do
it 'keeps args from the front until they exceed the limit' do
half_limit = Gitlab::Utils::LogLimitedArray::MAXIMUM_ARRAY_LENGTH / 2
hash_input['args'] = [1, 2, 'a' * half_limit, 'b' * half_limit, 3]
it 'removes jobstr from the hash' do
hash_input[:jobstr] = 'job string'
expected_args = hash_input['args'].take(3).map(&:to_s) + ['...']
expect(subject).not_to include('jobstr')
end
expect(subject['args']).to eq(expected_args)
end
it 'does not modify the input hash' do
input = { 'args' => [1, 'string'] }
output = Gitlab::Json.parse(described_class.new.call('INFO', now, 'my program', input))
expect(input['args']).to eq([1, 'string'])
expect(output['args']).to eq(['1', '[FILTERED]'])
end
it 'properly flattens arguments to a String' do
hash_input['args'] = [1, "test", 2, { 'test' => 1 }]
context 'job arguments' do
context 'when the arguments are bigger than the maximum allowed' do
it 'keeps args from the front until they exceed the limit' do
half_limit = Gitlab::Utils::LogLimitedArray::MAXIMUM_ARRAY_LENGTH / 2
hash_input['args'] = [1, 2, 'a' * half_limit, 'b' * half_limit, 3]
expected_args = hash_input['args'].take(3).map(&:to_s) + ['...']
expect(subject['args']).to eq(expected_args)
end
end
context 'when the job has non-integer arguments' do
it 'only allows permitted non-integer arguments through' do
hash_input['args'] = [1, 'foo', 'bar']
hash_input['class'] = 'WebHookWorker'
expect(subject['args']).to eq(["1", "test", "2", %({"test"=>1})])
expect(subject['args']).to eq(['1', '[FILTERED]', 'bar'])
end
end
it 'properly flattens arguments to a String' do
hash_input['args'] = [1, "test", 2, { 'test' => 1 }]
expect(subject['args']).to eq(["1", "test", "2", %({"test"=>1})])
end
end
context 'when the job has a non-integer value for retry' do
......
......@@ -40,6 +40,12 @@ describe 'Every Sidekiq worker' do
end
end
it 'has a value for loggable_arguments' do
workers_without_defaults.each do |worker|
expect(worker.klass.loggable_arguments).to be_an(Array)
end
end
describe "feature category declarations" do
let(:feature_categories) do
YAML.load_file(Rails.root.join('config', 'feature_categories.yml')).map(&:to_sym).to_set
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment