From 57a8859a11fe0c2cbd68deb83a2d2857c245fd2f Mon Sep 17 00:00:00 2001
From: Andrew Newdigate <andrew@gitlab.com>
Date: Mon, 7 Jan 2019 12:40:54 +0200
Subject: [PATCH] Conditionally initialize the global opentracing tracer

This change will instantiate an OpenTracing tracer and configure it
as the global tracer when the GITLAB_TRACING environment variable is
configured. GITLAB_TRACING takes a "connection string"-like value,
encapsulating the driver (eg jaeger, etc) and options for the driver.

Since each service, whether it's written in Ruby or Golang, uses the
same connection-string, it should be very easy to configure all
services in a cluster, or even a single development machine to be
setup to use tracing.

Note that this change does not include instrumentation or propagation
changes as this is a way of breaking a previous larger change into
components. The instrumentation and propagation changes will follow
in separate changes.
---
 Gemfile                                       |  6 ++
 Gemfile.lock                                  |  7 ++
 .../unreleased/an-opentracing-factory.yml     |  5 +
 config/initializers/tracing.rb                | 13 +++
 lib/gitlab/tracing.rb                         | 17 ++++
 lib/gitlab/tracing/factory.rb                 | 61 ++++++++++++
 lib/gitlab/tracing/jaeger_factory.rb          | 97 +++++++++++++++++++
 spec/lib/gitlab/tracing/factory_spec.rb       | 43 ++++++++
 .../lib/gitlab/tracing/jaeger_factory_spec.rb | 45 +++++++++
 9 files changed, 294 insertions(+)
 create mode 100644 changelogs/unreleased/an-opentracing-factory.yml
 create mode 100644 config/initializers/tracing.rb
 create mode 100644 lib/gitlab/tracing.rb
 create mode 100644 lib/gitlab/tracing/factory.rb
 create mode 100644 lib/gitlab/tracing/jaeger_factory.rb
 create mode 100644 spec/lib/gitlab/tracing/factory_spec.rb
 create mode 100644 spec/lib/gitlab/tracing/jaeger_factory_spec.rb

diff --git a/Gemfile b/Gemfile
index 515b42dc384..9ef4b791375 100644
--- a/Gemfile
+++ b/Gemfile
@@ -304,6 +304,12 @@ group :metrics do
   gem 'raindrops', '~> 0.18'
 end
 
+group :tracing do
+  # OpenTracing
+  gem 'opentracing', '~> 0.4.3'
+  gem 'jaeger-client', '~> 0.10.0'
+end
+
 group :development do
   gem 'foreman', '~> 0.84.0'
   gem 'brakeman', '~> 4.2', require: false
diff --git a/Gemfile.lock b/Gemfile.lock
index fda6e8ff975..122e22af167 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -392,6 +392,9 @@ GEM
       cause
       json
     ipaddress (0.8.3)
+    jaeger-client (0.10.0)
+      opentracing (~> 0.3)
+      thrift
     jira-ruby (1.4.1)
       activesupport
       multipart-post
@@ -547,6 +550,7 @@ GEM
       activesupport
       nokogiri (>= 1.4.4)
       omniauth (~> 1.0)
+    opentracing (0.4.3)
     org-ruby (0.9.12)
       rubypants (~> 0.2)
     orm_adapter (0.5.0)
@@ -870,6 +874,7 @@ GEM
       rack (>= 1, < 3)
     thor (0.19.4)
     thread_safe (0.3.6)
+    thrift (0.11.0.0)
     tilt (2.0.8)
     timecop (0.8.1)
     timfel-krb5-auth (0.8.3)
@@ -1040,6 +1045,7 @@ DEPENDENCIES
   httparty (~> 0.13.3)
   icalendar
   influxdb (~> 0.2)
+  jaeger-client (~> 0.10.0)
   jira-ruby (~> 1.4)
   jquery-atwho-rails (~> 1.3.2)
   js_regex (~> 2.2.1)
@@ -1080,6 +1086,7 @@ DEPENDENCIES
   omniauth-shibboleth (~> 1.3.0)
   omniauth-twitter (~> 1.4)
   omniauth_crowd (~> 2.2.0)
+  opentracing (~> 0.4.3)
   org-ruby (~> 0.9.12)
   peek (~> 1.0.1)
   peek-gc (~> 0.0.2)
diff --git a/changelogs/unreleased/an-opentracing-factory.yml b/changelogs/unreleased/an-opentracing-factory.yml
new file mode 100644
index 00000000000..c04736f3e63
--- /dev/null
+++ b/changelogs/unreleased/an-opentracing-factory.yml
@@ -0,0 +1,5 @@
+---
+title: Conditionally initialize the global opentracing tracer
+merge_request: 24186
+author:
+type: other
diff --git a/config/initializers/tracing.rb b/config/initializers/tracing.rb
new file mode 100644
index 00000000000..be95f30d075
--- /dev/null
+++ b/config/initializers/tracing.rb
@@ -0,0 +1,13 @@
+# frozen_string_literal: true
+
+if Gitlab::Tracing.enabled?
+  require 'opentracing'
+
+  # In multi-processed clustered architectures (puma, unicorn) don't
+  # start tracing until the worker processes are spawned. This works
+  # around issues when the opentracing implementation spawns threads
+  Gitlab::Cluster::LifecycleEvents.on_worker_start do
+    tracer = Gitlab::Tracing::Factory.create_tracer(Gitlab.process_name, Gitlab::Tracing.connection_string)
+    OpenTracing.global_tracer = tracer if tracer
+  end
+end
diff --git a/lib/gitlab/tracing.rb b/lib/gitlab/tracing.rb
new file mode 100644
index 00000000000..3c4db42ac06
--- /dev/null
+++ b/lib/gitlab/tracing.rb
@@ -0,0 +1,17 @@
+# frozen_string_literal: true
+
+module Gitlab
+  module Tracing
+    # Only enable tracing when the `GITLAB_TRACING` env var is configured. Note that we avoid using ApplicationSettings since
+    # the same environment variable needs to be configured for Workhorse, Gitaly and any other components which
+    # emit tracing. Since other components may start before Rails, and may not have access to ApplicationSettings,
+    # an env var makes more sense.
+    def self.enabled?
+      connection_string.present?
+    end
+
+    def self.connection_string
+      ENV['GITLAB_TRACING']
+    end
+  end
+end
diff --git a/lib/gitlab/tracing/factory.rb b/lib/gitlab/tracing/factory.rb
new file mode 100644
index 00000000000..fc714164353
--- /dev/null
+++ b/lib/gitlab/tracing/factory.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+require "cgi"
+
+module Gitlab
+  module Tracing
+    class Factory
+      OPENTRACING_SCHEME = "opentracing"
+
+      def self.create_tracer(service_name, connection_string)
+        return unless connection_string.present?
+
+        begin
+          opentracing_details = parse_connection_string(connection_string)
+          driver_name = opentracing_details[:driver_name]
+
+          case driver_name
+          when "jaeger"
+            JaegerFactory.create_tracer(service_name, opentracing_details[:options])
+          else
+            raise "Unknown driver: #{driver_name}"
+          end
+        rescue => e
+          # Can't create the tracer? Warn and continue sans tracer
+          warn "Unable to instantiate tracer: #{e}"
+          nil
+        end
+      end
+
+      def self.parse_connection_string(connection_string)
+        parsed = URI.parse(connection_string)
+
+        unless valid_uri?(parsed)
+          raise "Invalid tracing connection string"
+        end
+
+        {
+          driver_name: parsed.host,
+          options: parse_query(parsed.query)
+        }
+      end
+      private_class_method :parse_connection_string
+
+      def self.parse_query(query)
+        return {} unless query
+
+        CGI.parse(query).symbolize_keys.transform_values(&:first)
+      end
+      private_class_method :parse_query
+
+      def self.valid_uri?(uri)
+        return false unless uri
+
+        uri.scheme == OPENTRACING_SCHEME &&
+          uri.host.to_s =~ /^[a-z0-9_]+$/ &&
+          uri.path.empty?
+      end
+      private_class_method :valid_uri?
+    end
+  end
+end
diff --git a/lib/gitlab/tracing/jaeger_factory.rb b/lib/gitlab/tracing/jaeger_factory.rb
new file mode 100644
index 00000000000..0726f6b67f4
--- /dev/null
+++ b/lib/gitlab/tracing/jaeger_factory.rb
@@ -0,0 +1,97 @@
+# frozen_string_literal: true
+
+require 'jaeger/client'
+
+module Gitlab
+  module Tracing
+    class JaegerFactory
+      # When the probabilistic sampler is used, by default 0.1% of requests will be traced
+      DEFAULT_PROBABILISTIC_RATE = 0.001
+
+      # The default port for the Jaeger agent UDP listener
+      DEFAULT_UDP_PORT = 6831
+
+      # Reduce this from default of 10 seconds as the Ruby jaeger
+      # client doesn't have overflow control, leading to very large
+      # messages which fail to send over UDP (max packet = 64k)
+      # Flush more often, with smaller packets
+      FLUSH_INTERVAL = 5
+
+      def self.create_tracer(service_name, options)
+        kwargs = {
+          service_name: service_name,
+          sampler: get_sampler(options[:sampler], options[:sampler_param]),
+          reporter: get_reporter(service_name, options[:http_endpoint], options[:udp_endpoint])
+        }
+
+        extra_params = options.except(:sampler, :sampler_param, :http_endpoint, :udp_endpoint, :strict_parsing, :debug) # rubocop: disable CodeReuse/ActiveRecord
+        if extra_params.present?
+          message = "jaeger tracer: invalid option: #{extra_params.keys.join(", ")}"
+
+          if options[:strict_parsing]
+            raise message
+          else
+            warn message
+          end
+        end
+
+        Jaeger::Client.build(kwargs)
+      end
+
+      def self.get_sampler(sampler_type, sampler_param)
+        case sampler_type
+        when "probabilistic"
+          sampler_rate = sampler_param ? sampler_param.to_f : DEFAULT_PROBABILISTIC_RATE
+          Jaeger::Samplers::Probabilistic.new(rate: sampler_rate)
+        when "const"
+          const_value = sampler_param == "1"
+          Jaeger::Samplers::Const.new(const_value)
+        else
+          nil
+        end
+      end
+      private_class_method :get_sampler
+
+      def self.get_reporter(service_name, http_endpoint, udp_endpoint)
+        encoder = Jaeger::Encoders::ThriftEncoder.new(service_name: service_name)
+
+        if http_endpoint.present?
+          sender = get_http_sender(encoder, http_endpoint)
+        elsif udp_endpoint.present?
+          sender = get_udp_sender(encoder, udp_endpoint)
+        else
+          return nil
+        end
+
+        Jaeger::Reporters::RemoteReporter.new(
+          sender: sender,
+          flush_interval: FLUSH_INTERVAL
+        )
+      end
+      private_class_method :get_reporter
+
+      def self.get_http_sender(encoder, address)
+        Jaeger::HttpSender.new(
+          url: address,
+          encoder: encoder,
+          logger: Logger.new(STDOUT)
+        )
+      end
+      private_class_method :get_http_sender
+
+      def self.get_udp_sender(encoder, address)
+        pair = address.split(":", 2)
+        host = pair[0]
+        port = pair[1] ? pair[1].to_i : DEFAULT_UDP_PORT
+
+        Jaeger::UdpSender.new(
+          host: host,
+          port: port,
+          encoder: encoder,
+          logger: Logger.new(STDOUT)
+        )
+      end
+      private_class_method :get_udp_sender
+    end
+  end
+end
diff --git a/spec/lib/gitlab/tracing/factory_spec.rb b/spec/lib/gitlab/tracing/factory_spec.rb
new file mode 100644
index 00000000000..945490f0988
--- /dev/null
+++ b/spec/lib/gitlab/tracing/factory_spec.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+
+describe Gitlab::Tracing::Factory do
+  describe '.create_tracer' do
+    let(:service_name) { 'rspec' }
+
+    context "when tracing is not configured" do
+      it 'ignores null connection strings' do
+        expect(described_class.create_tracer(service_name, nil)).to be_nil
+      end
+
+      it 'ignores empty connection strings' do
+        expect(described_class.create_tracer(service_name, '')).to be_nil
+      end
+
+      it 'ignores unknown implementations' do
+        expect(described_class.create_tracer(service_name, 'opentracing://invalid_driver')).to be_nil
+      end
+
+      it 'ignores invalid connection strings' do
+        expect(described_class.create_tracer(service_name, 'open?tracing')).to be_nil
+      end
+    end
+
+    context "when tracing is configured with jaeger" do
+      let(:mock_tracer) { double('tracer') }
+
+      it 'processes default connections' do
+        expect(Gitlab::Tracing::JaegerFactory).to receive(:create_tracer).with(service_name, {}).and_return(mock_tracer)
+
+        expect(described_class.create_tracer(service_name, 'opentracing://jaeger')).to be(mock_tracer)
+      end
+
+      it 'processes connections with parameters' do
+        expect(Gitlab::Tracing::JaegerFactory).to receive(:create_tracer).with(service_name, { a: '1', b: '2', c: '3' }).and_return(mock_tracer)
+
+        expect(described_class.create_tracer(service_name, 'opentracing://jaeger?a=1&b=2&c=3')).to be(mock_tracer)
+      end
+    end
+  end
+end
diff --git a/spec/lib/gitlab/tracing/jaeger_factory_spec.rb b/spec/lib/gitlab/tracing/jaeger_factory_spec.rb
new file mode 100644
index 00000000000..3bffeb28830
--- /dev/null
+++ b/spec/lib/gitlab/tracing/jaeger_factory_spec.rb
@@ -0,0 +1,45 @@
+# frozen_string_literal: true
+
+require 'fast_spec_helper'
+
+describe Gitlab::Tracing::JaegerFactory do
+  describe '.create_tracer' do
+    let(:service_name) { 'rspec' }
+
+    it 'processes default connections' do
+      expect(described_class.create_tracer(service_name, {})).to respond_to(:active_span)
+    end
+
+    it 'handles debug options' do
+      expect(described_class.create_tracer(service_name, { debug: "1" })).to respond_to(:active_span)
+    end
+
+    it 'handles const sampler' do
+      expect(described_class.create_tracer(service_name, { sampler: "const", sampler_param: "1" })).to respond_to(:active_span)
+    end
+
+    it 'handles probabilistic sampler' do
+      expect(described_class.create_tracer(service_name, { sampler: "probabilistic", sampler_param: "0.5" })).to respond_to(:active_span)
+    end
+
+    it 'handles http_endpoint configurations' do
+      expect(described_class.create_tracer(service_name, { http_endpoint: "http://localhost:1234" })).to respond_to(:active_span)
+    end
+
+    it 'handles udp_endpoint configurations' do
+      expect(described_class.create_tracer(service_name, { udp_endpoint: "localhost:4321" })).to respond_to(:active_span)
+    end
+
+    it 'ignores invalid parameters' do
+      expect(described_class.create_tracer(service_name, { invalid: "true" })).to respond_to(:active_span)
+    end
+
+    it 'accepts the debug parameter when strict_parser is set' do
+      expect(described_class.create_tracer(service_name, { debug: "1", strict_parsing: "1" })).to respond_to(:active_span)
+    end
+
+    it 'rejects invalid parameters when strict_parser is set' do
+      expect { described_class.create_tracer(service_name, { invalid: "true", strict_parsing: "1" }) }.to raise_error(StandardError)
+    end
+  end
+end
-- 
2.30.9