Merge branch '118613-spam-api-call' into 'master'

Add Spam check endpoint See merge request gitlab-org/gitlab!31449

Merge branch '118613-spam-api-call' into 'master'
Add Spam check endpoint See merge request gitlab-org/gitlab!31449
402d9e56 · Heinrich Lee Yu · d43554a8 · 61f7b9e4 · 402d9e56 · 402d9e56
Commit 402d9e56 authored May 22, 2020 by Heinrich Lee Yu
17 changed files
--- a/app/helpers/application_settings_helper.rb
+++ b/app/helpers/application_settings_helper.rb
@@ -261,6 +261,8 @@ module ApplicationSettingsHelper
      :sourcegraph_enabled,
      :sourcegraph_url,
      :sourcegraph_public_only,
+      :spam_check_endpoint_enabled,
+      :spam_check_endpoint_url,
      :terminal_max_session_time,
      :terms,
      :throttle_authenticated_api_enabled,

--- a/app/models/application_setting.rb
+++ b/app/models/application_setting.rb
@@ -301,6 +301,13 @@ class ApplicationSetting < ApplicationRecord
            numericality: { greater_than: 0, less_than_or_equal_to: 10 },
            if: :external_authorization_service_enabled

+  validates :spam_check_endpoint_url,
+            addressable_url: true, allow_blank: true
+
+  validates :spam_check_endpoint_url,
+            presence: true,
+            if: :spam_check_endpoint_enabled
+
  validates :external_auth_client_key,
            presence: true,
            if: -> (setting) { setting.external_auth_client_cert.present? }

--- a/app/models/application_setting_implementation.rb
+++ b/app/models/application_setting_implementation.rb
@@ -115,6 +115,8 @@ module ApplicationSettingImplementation
        sourcegraph_enabled: false,
        sourcegraph_url: nil,
        sourcegraph_public_only: true,
+        spam_check_endpoint_enabled: false,
+        spam_check_endpoint_url: nil,
        minimum_password_length: DEFAULT_MINIMUM_PASSWORD_LENGTH,
        namespace_storage_size_limit: 0,
        terminal_max_session_time: 0,

--- a/app/services/spam/spam_constants.rb
+++ b/app/services/spam/spam_constants.rb
@@ -2,8 +2,24 @@

 module Spam
  module SpamConstants
-    REQUIRE_RECAPTCHA = :recaptcha
-    DISALLOW = :disallow
-    ALLOW = :allow
+    REQUIRE_RECAPTCHA = "recaptcha"
+    DISALLOW = "disallow"
+    ALLOW = "allow"
+    BLOCK_USER = "block"
+
+    SUPPORTED_VERDICTS = {
+      BLOCK_USER => {
+        priority: 1
+      },
+      DISALLOW => {
+        priority: 2
+      },
+      REQUIRE_RECAPTCHA => {
+        priority: 3
+      },
+      ALLOW => {
+        priority: 4
+      }
+    }.freeze
  end
 end
--- a/app/services/spam/spam_verdict_service.rb
+++ b/app/services/spam/spam_verdict_service.rb
@@ -5,13 +5,31 @@ module Spam
    include AkismetMethods
    include SpamConstants

-    def initialize(target:, request:, options:)
+    def initialize(target:, request:, options:, verdict_params: {})
      @target = target
      @request = request
      @options = options
+      @verdict_params = assemble_verdict_params(verdict_params)
    end

    def execute
+      external_spam_check_result = spam_verdict
+      akismet_result = akismet_verdict
+
+      # filter out anything we don't recognise, including nils.
+      valid_results = [external_spam_check_result, akismet_result].compact.select { |r| SUPPORTED_VERDICTS.key?(r) }
+      # Treat nils - such as service unavailable - as ALLOW
+      return ALLOW unless valid_results.any?
+
+      # Favour the most restrictive result.
+      valid_results.min_by { |v| SUPPORTED_VERDICTS[v][:priority] }
+    end
+
+    private
+
+    attr_reader :target, :request, :options, :verdict_params
+
+    def akismet_verdict
      if akismet.spam?
        Gitlab::Recaptcha.enabled? ? REQUIRE_RECAPTCHA : DISALLOW
      else
@@ -19,8 +37,41 @@ module Spam
      end
    end

-    private
+    def spam_verdict
+      return unless Gitlab::CurrentSettings.spam_check_endpoint_enabled
+      return if endpoint_url.blank?
+
+      result = Gitlab::HTTP.try_get(endpoint_url, verdict_params)
+      return unless result
+
+      begin
+        json_result = Gitlab::Json.parse(result).with_indifferent_access
+        # @TODO metrics/logging
+        # Expecting:
+        # error: (string or nil)
+        # result: (string or nil)
+        verdict = json_result[:verdict]
+        return unless SUPPORTED_VERDICTS.include?(verdict)

-    attr_reader :target, :request, :options
+        # @TODO log if json_result[:error]
+
+        json_result[:verdict]
+      rescue
+        # @TODO log
+        ALLOW
+      end
+    end
+
+    def assemble_verdict_params(params)
+      return {} unless endpoint_url
+
+      params.merge({
+        user_id: target.author_id
+      })
+    end
+
+    def endpoint_url
+      @endpoint_url ||= Gitlab::CurrentSettings.current_application_settings.spam_check_endpoint_url
+    end
  end
 end
--- a/app/views/admin/application_settings/_spam.html.haml
+++ b/app/views/admin/application_settings/_spam.html.haml
@@ -62,4 +62,13 @@
      .form-text.text-muted
        How many seconds an IP will be counted towards the limit

+    .form-group
+      .form-check
+        = f.check_box :spam_check_endpoint_enabled, class: 'form-check-input'
+        = f.label :spam_check_endpoint_enabled, _('Enable Spam Check via external API endpoint'), class: 'form-check-label'
+        .form-text.text-muted= _('Define custom rules for what constitutes spam, independent of Akismet')
+    .form-group
+      = f.label :spam_check_endpoint_url, _('URL of the external Spam Check endpoint'), class: 'label-bold'
+      = f.text_field :spam_check_endpoint_url, class: 'form-control'
+
  = f.submit 'Save changes', class: "btn btn-success"
--- a/changelogs/unreleased/118613-spam-api-call.yml
+++ b/changelogs/unreleased/118613-spam-api-call.yml
+---
+title: SpamVerdictService can call external spam check endpoint
+merge_request: 31449
+author:
+type: added
--- a/db/migrate/20200508050301_add_spam_check_endpoint_to_application_settings.rb
+++ b/db/migrate/20200508050301_add_spam_check_endpoint_to_application_settings.rb
+# frozen_string_literal: true
+
+class AddSpamCheckEndpointToApplicationSettings < ActiveRecord::Migration[6.0]
+  include Gitlab::Database::MigrationHelpers
+
+  DOWNTIME = false
+
+  disable_ddl_transaction!
+
+  def up
+    unless column_exists?(:application_settings, :spam_check_endpoint_url)
+      add_column :application_settings, :spam_check_endpoint_url, :text
+    end
+
+    add_text_limit :application_settings, :spam_check_endpoint_url, 255
+
+    unless column_exists?(:application_settings, :spam_check_endpoint_enabled)
+      add_column :application_settings, :spam_check_endpoint_enabled, :boolean, null: false, default: false
+    end
+  end
+
+  def down
+    remove_column_if_exists :spam_check_endpoint_url
+    remove_column_if_exists :spam_check_endpoint_enabled
+  end
+
+  private
+
+  def remove_column_if_exists(column)
+    return unless column_exists?(:application_settings, column)
+
+    remove_column :application_settings, column
+  end
+end
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -441,7 +441,10 @@ CREATE TABLE public.application_settings (
    container_registry_vendor text DEFAULT ''::text NOT NULL,
    container_registry_version text DEFAULT ''::text NOT NULL,
    container_registry_features text[] DEFAULT '{}'::text[] NOT NULL,
+    spam_check_endpoint_url text,
+    spam_check_endpoint_enabled boolean DEFAULT false NOT NULL,
    CONSTRAINT check_d03919528d CHECK ((char_length(container_registry_vendor) <= 255)),
+    CONSTRAINT check_d820146492 CHECK ((char_length(spam_check_endpoint_url) <= 255)),
    CONSTRAINT check_e5aba18f02 CHECK ((char_length(container_registry_version) <= 255))
 );

@@ -13880,6 +13883,7 @@ COPY "schema_migrations" (version) FROM STDIN;
 20200506125731
 20200506154421
 20200507221434
+20200508050301
 20200508091106
 20200511080113
 20200511083541

--- a/doc/api/settings.md
+++ b/doc/api/settings.md
@@ -335,6 +335,8 @@ are listed in the descriptions of the relevant settings.
 | `sourcegraph_enabled`                    | boolean          | no                                    | Enables Sourcegraph integration. Default is `false`. **If enabled, requires** `sourcegraph_url`. |
 | `sourcegraph_url`                        | string           | required by: `sourcegraph_enabled`    | The Sourcegraph instance URL for integration. |
 | `sourcegraph_public_only`                | boolean          | no                                   | Blocks Sourcegraph from being loaded on private and internal projects. Default is `true`. |
+| `spam_check_endpoint_enabled`            | boolean          | no                                   | Enables Spam Check via external API endpoint. Default is `false`. |
+| `spam_check_endpoint_url`                | string           | no                                   | URL of the external Spam Check service endpoint. |
 | `terminal_max_session_time`              | integer          | no                                   | Maximum time for web terminal websocket connection (in seconds). Set to `0` for unlimited time. |
 | `terms`                                  | text             | required by: `enforce_terms`         | (**Required by:** `enforce_terms`) Markdown content for the ToS. |
 | `throttle_authenticated_api_enabled`     | boolean          | no                                   | (**If enabled, requires:** `throttle_authenticated_api_period_in_seconds` and `throttle_authenticated_api_requests_per_period`) Enable authenticated API request rate limit. Helps reduce request volume (for example, from crawlers or abusive bots). |

--- a/lib/api/settings.rb
+++ b/lib/api/settings.rb
@@ -132,6 +132,10 @@ module API
      given sourcegraph_enabled: ->(val) { val } do
        requires :sourcegraph_url, type: String, desc: 'The configured Sourcegraph instance URL'
      end
+      optional :spam_check_endpoint_enabled, type: Boolean, desc: 'Enable Spam Check via external API endpoint'
+      given spam_check_endpoint_enabled: ->(val) { val } do
+        requires :spam_check_endpoint_url, type: String, desc: 'The URL of the external Spam Check service endpoint'
+      end
      optional :terminal_max_session_time, type: Integer, desc: 'Maximum time for web terminal websocket connection (in seconds). Set to 0 for unlimited time.'
      optional :usage_ping_enabled, type: Boolean, desc: 'Every week GitLab will report license usage back to GitLab, Inc.'
      optional :instance_statistics_visibility_private, type: Boolean, desc: 'When set to `true` Instance statistics will only be available to admins'

--- a/locale/gitlab.pot
+++ b/locale/gitlab.pot
@@ -6933,6 +6933,9 @@ msgstr ""
 msgid "Define a custom pattern with cron syntax"
 msgstr ""

+msgid "Define custom rules for what constitutes spam, independent of Akismet"
+msgstr ""
+
 msgid "Define environments in the deploy stage(s) in <code>.gitlab-ci.yml</code> to track deployments here."
 msgstr ""

@@ -8055,6 +8058,9 @@ msgstr ""
 msgid "Enable Seat Link"
 msgstr ""

+msgid "Enable Spam Check via external API endpoint"
+msgstr ""
+
 msgid "Enable access to Grafana"
 msgstr ""

@@ -23086,6 +23092,9 @@ msgstr ""
 msgid "URL must start with %{codeStart}http://%{codeEnd}, %{codeStart}https://%{codeEnd}, or %{codeStart}ftp://%{codeEnd}"
 msgstr ""

+msgid "URL of the external Spam Check endpoint"
+msgstr ""
+
 msgid "URL of the external storage that will serve the repository static objects (e.g. archives, blobs, ...)."
 msgstr ""


--- a/spec/features/admin/admin_settings_spec.rb
+++ b/spec/features/admin/admin_settings_spec.rb
@@ -282,11 +282,13 @@ describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_moc
        visit reporting_admin_application_settings_path

        page.within('.as-spam') do
-          check 'Enable reCAPTCHA'
-          check 'Enable reCAPTCHA for login'
          fill_in 'reCAPTCHA Site Key', with: 'key'
          fill_in 'reCAPTCHA Private Key', with: 'key'
+          check 'Enable reCAPTCHA'
+          check 'Enable reCAPTCHA for login'
          fill_in 'IPs per user', with: 15
+          check 'Enable Spam Check via external API endpoint'
+          fill_in 'URL of the external Spam Check endpoint', with: 'https://www.example.com/spamcheck'
          click_button 'Save changes'
        end

@@ -294,6 +296,8 @@ describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_moc
        expect(current_settings.recaptcha_enabled).to be true
        expect(current_settings.login_recaptcha_protection_enabled).to be true
        expect(current_settings.unique_ips_limit_per_user).to eq(15)
+        expect(current_settings.spam_check_endpoint_enabled).to be true
+        expect(current_settings.spam_check_endpoint_url).to eq 'https://www.example.com/spamcheck'
      end
    end


--- a/spec/models/application_setting_spec.rb
+++ b/spec/models/application_setting_spec.rb
@@ -152,6 +152,30 @@ describe ApplicationSetting do
      end
    end

+    describe 'spam_check_endpoint' do
+      context 'when spam_check_endpoint is enabled' do
+        before do
+          setting.spam_check_endpoint_enabled = true
+        end
+
+        it { is_expected.to allow_value('https://example.org/spam_check').for(:spam_check_endpoint_url) }
+        it { is_expected.not_to allow_value('nonsense').for(:spam_check_endpoint_url) }
+        it { is_expected.not_to allow_value(nil).for(:spam_check_endpoint_url) }
+        it { is_expected.not_to allow_value('').for(:spam_check_endpoint_url) }
+      end
+
+      context 'when spam_check_endpoint is NOT enabled' do
+        before do
+          setting.spam_check_endpoint_enabled = false
+        end
+
+        it { is_expected.to allow_value('https://example.org/spam_check').for(:spam_check_endpoint_url) }
+        it { is_expected.not_to allow_value('nonsense').for(:spam_check_endpoint_url) }
+        it { is_expected.to allow_value(nil).for(:spam_check_endpoint_url) }
+        it { is_expected.to allow_value('').for(:spam_check_endpoint_url) }
+      end
+    end
+
    context 'when snowplow is enabled' do
      before do
        setting.snowplow_enabled = true

--- a/spec/requests/api/settings_spec.rb
+++ b/spec/requests/api/settings_spec.rb
@@ -38,6 +38,8 @@ describe API::Settings, 'Settings' do
      expect(json_response).not_to have_key('performance_bar_allowed_group_path')
      expect(json_response).not_to have_key('performance_bar_enabled')
      expect(json_response['snippet_size_limit']).to eq(50.megabytes)
+      expect(json_response['spam_check_endpoint_enabled']).to be_falsey
+      expect(json_response['spam_check_endpoint_url']).to be_nil
    end
  end

@@ -90,7 +92,9 @@ describe API::Settings, 'Settings' do
            push_event_activities_limit: 2,
            snippet_size_limit: 5,
            issues_create_limit: 300,
-            raw_blob_request_limit: 300
+            raw_blob_request_limit: 300,
+            spam_check_endpoint_enabled: true,
+            spam_check_endpoint_url: 'https://example.com/spam_check'
          }

        expect(response).to have_gitlab_http_status(:ok)
@@ -129,6 +133,8 @@ describe API::Settings, 'Settings' do
        expect(json_response['snippet_size_limit']).to eq(5)
        expect(json_response['issues_create_limit']).to eq(300)
        expect(json_response['raw_blob_request_limit']).to eq(300)
+        expect(json_response['spam_check_endpoint_enabled']).to be_truthy
+        expect(json_response['spam_check_endpoint_url']).to eq('https://example.com/spam_check')
      end
    end

@@ -390,5 +396,14 @@ describe API::Settings, 'Settings' do
        expect(json_response['error']).to eq('sourcegraph_url is missing')
      end
    end
+
+    context "missing spam_check_endpoint_url value when spam_check_endpoint_enabled is true" do
+      it "returns a blank parameter error message" do
+        put api("/application/settings", admin), params: { spam_check_endpoint_enabled: true }
+
+        expect(response).to have_gitlab_http_status(:bad_request)
+        expect(json_response['error']).to eq('spam_check_endpoint_url is missing')
+      end
+    end
  end
 end
--- a/spec/services/spam/spam_verdict_service_spec.rb
+++ b/spec/services/spam/spam_verdict_service_spec.rb
@@ -25,20 +25,105 @@ describe Spam::SpamVerdictService do
    subject { service.execute }

    before do
+      allow(service).to receive(:akismet_verdict).and_return(nil)
+      allow(service).to receive(:spam_verdict_verdict).and_return(nil)
+    end
+
+    context 'if all services return nil' do
+      it 'renders ALLOW verdict' do
+        expect(subject).to eq ALLOW
+      end
+    end
+
+    context 'if only one service returns a verdict' do
+      context 'and it is supported' do
+        before do
+          allow(service).to receive(:akismet_verdict).and_return(DISALLOW)
+        end
+
+        it 'renders that verdict' do
+          expect(subject).to eq DISALLOW
+        end
+      end
+
+      context 'and it is unexpected' do
+        before do
+          allow(service).to receive(:akismet_verdict).and_return("unexpected")
+        end
+
+        it 'allows' do
+          expect(subject).to eq ALLOW
+        end
+      end
+    end
+
+    context 'if more than one service returns a verdict' do
+      context 'and they are supported' do
+        before do
+          allow(service).to receive(:akismet_verdict).and_return(DISALLOW)
+          allow(service).to receive(:spam_verdict).and_return(BLOCK_USER)
+        end
+
+        it 'renders the more restrictive verdict' do
+          expect(subject).to eq BLOCK_USER
+        end
+      end
+
+      context 'and one is supported' do
+        before do
+          allow(service).to receive(:akismet_verdict).and_return('nonsense')
+          allow(service).to receive(:spam_verdict).and_return(BLOCK_USER)
+        end
+
+        it 'renders the more restrictive verdict' do
+          expect(subject).to eq BLOCK_USER
+        end
+      end
+
+      context 'and one is supported' do
+        before do
+          allow(service).to receive(:akismet_verdict).and_return('nonsense')
+          allow(service).to receive(:spam_verdict).and_return(BLOCK_USER)
+        end
+
+        it 'renders the more restrictive verdict' do
+          expect(subject).to eq BLOCK_USER
+        end
+      end
+
+      context 'and none are supported' do
+        before do
+          allow(service).to receive(:akismet_verdict).and_return('nonsense')
+          allow(service).to receive(:spam_verdict).and_return('rubbish')
+        end
+
+        it 'renders the more restrictive verdict' do
+          expect(subject).to eq ALLOW
+        end
+      end
+    end
+  end
+
+  describe '#akismet_verdict' do
+    subject { service.send(:akismet_verdict) }
+
+    context 'if Akismet is enabled' do
+      before do
+        stub_application_setting(akismet_enabled: true)
        allow_next_instance_of(Spam::AkismetService) do |service|
-        allow(service).to receive(:spam?).and_return(spam_verdict)
+          allow(service).to receive(:spam?).and_return(akismet_result)
        end
      end

      context 'if Akismet considers it spam' do
-      let(:spam_verdict) { true }
+        let(:akismet_result) { true }

        context 'if reCAPTCHA is enabled' do
          before do
            stub_application_setting(recaptcha_enabled: true)
          end

-        it 'requires reCAPTCHA' do
+          it 'returns require reCAPTCHA verdict' do
            expect(subject).to eq REQUIRE_RECAPTCHA
          end
        end
@@ -48,18 +133,148 @@ describe Spam::SpamVerdictService do
            stub_application_setting(recaptcha_enabled: false)
          end

-        it 'disallows the change' do
+          it 'renders disallow verdict' do
            expect(subject).to eq DISALLOW
          end
        end
      end

      context 'if Akismet does not consider it spam' do
-      let(:spam_verdict) { false }
+        let(:akismet_result) { false }

-      it 'allows the change' do
+        it 'renders allow verdict' do
          expect(subject).to eq ALLOW
        end
      end
    end
+
+    context 'if Akismet is not enabled' do
+      before do
+        stub_application_setting(akismet_enabled: false)
+      end
+
+      it 'renders allow verdict' do
+        expect(subject).to eq ALLOW
+      end
+    end
+  end
+
+  describe '#spam_verdict' do
+    subject { service.send(:spam_verdict) }
+
+    context 'if a Spam Check endpoint enabled and set to a URL' do
+      let(:spam_check_body) { {} }
+      let(:spam_check_http_status) { nil }
+
+      before do
+        stub_application_setting(spam_check_endpoint_enabled: true)
+        stub_application_setting(spam_check_endpoint_url: "http://www.spamcheckurl.com/spam_check")
+        stub_request(:any, /.*spamcheckurl.com.*/).to_return( body: spam_check_body.to_json, status: spam_check_http_status )
+      end
+
+      context 'if the endpoint is accessible' do
+        let(:spam_check_http_status) { 200 }
+        let(:error) { nil }
+        let(:verdict) { nil }
+        let(:spam_check_body) do
+          { verdict: verdict, error: error }
+        end
+
+        context 'the result is a valid verdict' do
+          let(:verdict) { 'allow' }
+
+          it 'returns the verdict' do
+            expect(subject).to eq ALLOW
+          end
+        end
+
+        context 'the verdict is an unexpected string' do
+          let(:verdict) { 'this is fine' }
+
+          it 'returns nil' do
+            expect(subject).to be_nil
+          end
+        end
+
+        context 'the JSON is malformed' do
+          let(:spam_check_body) { 'this is fine' }
+
+          it 'returns allow' do
+            expect(subject).to eq ALLOW
+          end
+        end
+
+        context 'the verdict is an empty string' do
+          let(:verdict) { '' }
+
+          it 'returns nil' do
+            expect(subject).to be_nil
+          end
+        end
+
+        context 'the verdict is nil' do
+          let(:verdict) { nil }
+
+          it 'returns nil' do
+            expect(subject).to be_nil
+          end
+        end
+
+        context 'there is an error' do
+          let(:error) { "Sorry Dave, I can't do that" }
+
+          it 'returns nil' do
+            expect(subject).to be_nil
+          end
+        end
+
+        context 'the HTTP status is not 200' do
+          let(:spam_check_http_status) { 500 }
+
+          it 'returns nil' do
+            expect(subject).to be_nil
+          end
+        end
+
+        context 'the confused API endpoint returns both an error and a verdict' do
+          let(:verdict) { 'disallow' }
+          let(:error) { 'oh noes!' }
+
+          it 'renders the verdict' do
+            expect(subject).to eq DISALLOW
+          end
+        end
+      end
+
+      context 'if the endpoint times out' do
+        before do
+          stub_request(:any, /.*spamcheckurl.com.*/).to_timeout
+        end
+
+        it 'returns nil' do
+          expect(subject).to be_nil
+        end
+      end
+    end
+
+    context 'if a Spam Check endpoint is not set' do
+      before do
+        stub_application_setting(spam_check_endpoint_url: nil)
+      end
+
+      it 'returns nil' do
+        expect(subject).to be_nil
+      end
+    end
+
+    context 'if Spam Check endpoint is not enabled' do
+      before do
+        stub_application_setting(spam_check_endpoint_enabled: false)
+      end
+
+      it 'returns nil' do
+        expect(subject).to be_nil
+      end
+    end
+  end
 end
--- a/spec/support/shared_contexts/spam_constants.rb
+++ b/spec/support/shared_contexts/spam_constants.rb
@@ -5,5 +5,6 @@ shared_context 'includes Spam constants' do
    stub_const('REQUIRE_RECAPTCHA', Spam::SpamConstants::REQUIRE_RECAPTCHA)
    stub_const('DISALLOW', Spam::SpamConstants::DISALLOW)
    stub_const('ALLOW', Spam::SpamConstants::ALLOW)
+    stub_const('BLOCK_USER', Spam::SpamConstants::BLOCK_USER)
  end
 end