Commit 402d9e56 authored by Heinrich Lee Yu's avatar Heinrich Lee Yu

Merge branch '118613-spam-api-call' into 'master'

Add Spam check endpoint

See merge request gitlab-org/gitlab!31449
parents d43554a8 61f7b9e4
......@@ -261,6 +261,8 @@ module ApplicationSettingsHelper
:sourcegraph_enabled,
:sourcegraph_url,
:sourcegraph_public_only,
:spam_check_endpoint_enabled,
:spam_check_endpoint_url,
:terminal_max_session_time,
:terms,
:throttle_authenticated_api_enabled,
......
......@@ -301,6 +301,13 @@ class ApplicationSetting < ApplicationRecord
numericality: { greater_than: 0, less_than_or_equal_to: 10 },
if: :external_authorization_service_enabled
validates :spam_check_endpoint_url,
addressable_url: true, allow_blank: true
validates :spam_check_endpoint_url,
presence: true,
if: :spam_check_endpoint_enabled
validates :external_auth_client_key,
presence: true,
if: -> (setting) { setting.external_auth_client_cert.present? }
......
......@@ -115,6 +115,8 @@ module ApplicationSettingImplementation
sourcegraph_enabled: false,
sourcegraph_url: nil,
sourcegraph_public_only: true,
spam_check_endpoint_enabled: false,
spam_check_endpoint_url: nil,
minimum_password_length: DEFAULT_MINIMUM_PASSWORD_LENGTH,
namespace_storage_size_limit: 0,
terminal_max_session_time: 0,
......
......@@ -2,8 +2,24 @@
module Spam
module SpamConstants
REQUIRE_RECAPTCHA = :recaptcha
DISALLOW = :disallow
ALLOW = :allow
REQUIRE_RECAPTCHA = "recaptcha"
DISALLOW = "disallow"
ALLOW = "allow"
BLOCK_USER = "block"
SUPPORTED_VERDICTS = {
BLOCK_USER => {
priority: 1
},
DISALLOW => {
priority: 2
},
REQUIRE_RECAPTCHA => {
priority: 3
},
ALLOW => {
priority: 4
}
}.freeze
end
end
......@@ -5,13 +5,31 @@ module Spam
include AkismetMethods
include SpamConstants
def initialize(target:, request:, options:)
def initialize(target:, request:, options:, verdict_params: {})
@target = target
@request = request
@options = options
@verdict_params = assemble_verdict_params(verdict_params)
end
def execute
external_spam_check_result = spam_verdict
akismet_result = akismet_verdict
# filter out anything we don't recognise, including nils.
valid_results = [external_spam_check_result, akismet_result].compact.select { |r| SUPPORTED_VERDICTS.key?(r) }
# Treat nils - such as service unavailable - as ALLOW
return ALLOW unless valid_results.any?
# Favour the most restrictive result.
valid_results.min_by { |v| SUPPORTED_VERDICTS[v][:priority] }
end
private
attr_reader :target, :request, :options, :verdict_params
def akismet_verdict
if akismet.spam?
Gitlab::Recaptcha.enabled? ? REQUIRE_RECAPTCHA : DISALLOW
else
......@@ -19,8 +37,41 @@ module Spam
end
end
private
def spam_verdict
return unless Gitlab::CurrentSettings.spam_check_endpoint_enabled
return if endpoint_url.blank?
result = Gitlab::HTTP.try_get(endpoint_url, verdict_params)
return unless result
begin
json_result = Gitlab::Json.parse(result).with_indifferent_access
# @TODO metrics/logging
# Expecting:
# error: (string or nil)
# result: (string or nil)
verdict = json_result[:verdict]
return unless SUPPORTED_VERDICTS.include?(verdict)
attr_reader :target, :request, :options
# @TODO log if json_result[:error]
json_result[:verdict]
rescue
# @TODO log
ALLOW
end
end
def assemble_verdict_params(params)
return {} unless endpoint_url
params.merge({
user_id: target.author_id
})
end
def endpoint_url
@endpoint_url ||= Gitlab::CurrentSettings.current_application_settings.spam_check_endpoint_url
end
end
end
......@@ -62,4 +62,13 @@
.form-text.text-muted
How many seconds an IP will be counted towards the limit
.form-group
.form-check
= f.check_box :spam_check_endpoint_enabled, class: 'form-check-input'
= f.label :spam_check_endpoint_enabled, _('Enable Spam Check via external API endpoint'), class: 'form-check-label'
.form-text.text-muted= _('Define custom rules for what constitutes spam, independent of Akismet')
.form-group
= f.label :spam_check_endpoint_url, _('URL of the external Spam Check endpoint'), class: 'label-bold'
= f.text_field :spam_check_endpoint_url, class: 'form-control'
= f.submit 'Save changes', class: "btn btn-success"
---
title: SpamVerdictService can call external spam check endpoint
merge_request: 31449
author:
type: added
# frozen_string_literal: true
class AddSpamCheckEndpointToApplicationSettings < ActiveRecord::Migration[6.0]
include Gitlab::Database::MigrationHelpers
DOWNTIME = false
disable_ddl_transaction!
def up
unless column_exists?(:application_settings, :spam_check_endpoint_url)
add_column :application_settings, :spam_check_endpoint_url, :text
end
add_text_limit :application_settings, :spam_check_endpoint_url, 255
unless column_exists?(:application_settings, :spam_check_endpoint_enabled)
add_column :application_settings, :spam_check_endpoint_enabled, :boolean, null: false, default: false
end
end
def down
remove_column_if_exists :spam_check_endpoint_url
remove_column_if_exists :spam_check_endpoint_enabled
end
private
def remove_column_if_exists(column)
return unless column_exists?(:application_settings, column)
remove_column :application_settings, column
end
end
......@@ -441,7 +441,10 @@ CREATE TABLE public.application_settings (
container_registry_vendor text DEFAULT ''::text NOT NULL,
container_registry_version text DEFAULT ''::text NOT NULL,
container_registry_features text[] DEFAULT '{}'::text[] NOT NULL,
spam_check_endpoint_url text,
spam_check_endpoint_enabled boolean DEFAULT false NOT NULL,
CONSTRAINT check_d03919528d CHECK ((char_length(container_registry_vendor) <= 255)),
CONSTRAINT check_d820146492 CHECK ((char_length(spam_check_endpoint_url) <= 255)),
CONSTRAINT check_e5aba18f02 CHECK ((char_length(container_registry_version) <= 255))
);
......@@ -13880,6 +13883,7 @@ COPY "schema_migrations" (version) FROM STDIN;
20200506125731
20200506154421
20200507221434
20200508050301
20200508091106
20200511080113
20200511083541
......
......@@ -335,6 +335,8 @@ are listed in the descriptions of the relevant settings.
| `sourcegraph_enabled` | boolean | no | Enables Sourcegraph integration. Default is `false`. **If enabled, requires** `sourcegraph_url`. |
| `sourcegraph_url` | string | required by: `sourcegraph_enabled` | The Sourcegraph instance URL for integration. |
| `sourcegraph_public_only` | boolean | no | Blocks Sourcegraph from being loaded on private and internal projects. Default is `true`. |
| `spam_check_endpoint_enabled` | boolean | no | Enables Spam Check via external API endpoint. Default is `false`. |
| `spam_check_endpoint_url` | string | no | URL of the external Spam Check service endpoint. |
| `terminal_max_session_time` | integer | no | Maximum time for web terminal websocket connection (in seconds). Set to `0` for unlimited time. |
| `terms` | text | required by: `enforce_terms` | (**Required by:** `enforce_terms`) Markdown content for the ToS. |
| `throttle_authenticated_api_enabled` | boolean | no | (**If enabled, requires:** `throttle_authenticated_api_period_in_seconds` and `throttle_authenticated_api_requests_per_period`) Enable authenticated API request rate limit. Helps reduce request volume (for example, from crawlers or abusive bots). |
......
......@@ -132,6 +132,10 @@ module API
given sourcegraph_enabled: ->(val) { val } do
requires :sourcegraph_url, type: String, desc: 'The configured Sourcegraph instance URL'
end
optional :spam_check_endpoint_enabled, type: Boolean, desc: 'Enable Spam Check via external API endpoint'
given spam_check_endpoint_enabled: ->(val) { val } do
requires :spam_check_endpoint_url, type: String, desc: 'The URL of the external Spam Check service endpoint'
end
optional :terminal_max_session_time, type: Integer, desc: 'Maximum time for web terminal websocket connection (in seconds). Set to 0 for unlimited time.'
optional :usage_ping_enabled, type: Boolean, desc: 'Every week GitLab will report license usage back to GitLab, Inc.'
optional :instance_statistics_visibility_private, type: Boolean, desc: 'When set to `true` Instance statistics will only be available to admins'
......
......@@ -6933,6 +6933,9 @@ msgstr ""
msgid "Define a custom pattern with cron syntax"
msgstr ""
msgid "Define custom rules for what constitutes spam, independent of Akismet"
msgstr ""
msgid "Define environments in the deploy stage(s) in <code>.gitlab-ci.yml</code> to track deployments here."
msgstr ""
......@@ -8055,6 +8058,9 @@ msgstr ""
msgid "Enable Seat Link"
msgstr ""
msgid "Enable Spam Check via external API endpoint"
msgstr ""
msgid "Enable access to Grafana"
msgstr ""
......@@ -23086,6 +23092,9 @@ msgstr ""
msgid "URL must start with %{codeStart}http://%{codeEnd}, %{codeStart}https://%{codeEnd}, or %{codeStart}ftp://%{codeEnd}"
msgstr ""
msgid "URL of the external Spam Check endpoint"
msgstr ""
msgid "URL of the external storage that will serve the repository static objects (e.g. archives, blobs, ...)."
msgstr ""
......
......@@ -282,11 +282,13 @@ describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_moc
visit reporting_admin_application_settings_path
page.within('.as-spam') do
check 'Enable reCAPTCHA'
check 'Enable reCAPTCHA for login'
fill_in 'reCAPTCHA Site Key', with: 'key'
fill_in 'reCAPTCHA Private Key', with: 'key'
check 'Enable reCAPTCHA'
check 'Enable reCAPTCHA for login'
fill_in 'IPs per user', with: 15
check 'Enable Spam Check via external API endpoint'
fill_in 'URL of the external Spam Check endpoint', with: 'https://www.example.com/spamcheck'
click_button 'Save changes'
end
......@@ -294,6 +296,8 @@ describe 'Admin updates settings', :clean_gitlab_redis_shared_state, :do_not_moc
expect(current_settings.recaptcha_enabled).to be true
expect(current_settings.login_recaptcha_protection_enabled).to be true
expect(current_settings.unique_ips_limit_per_user).to eq(15)
expect(current_settings.spam_check_endpoint_enabled).to be true
expect(current_settings.spam_check_endpoint_url).to eq 'https://www.example.com/spamcheck'
end
end
......
......@@ -152,6 +152,30 @@ describe ApplicationSetting do
end
end
describe 'spam_check_endpoint' do
context 'when spam_check_endpoint is enabled' do
before do
setting.spam_check_endpoint_enabled = true
end
it { is_expected.to allow_value('https://example.org/spam_check').for(:spam_check_endpoint_url) }
it { is_expected.not_to allow_value('nonsense').for(:spam_check_endpoint_url) }
it { is_expected.not_to allow_value(nil).for(:spam_check_endpoint_url) }
it { is_expected.not_to allow_value('').for(:spam_check_endpoint_url) }
end
context 'when spam_check_endpoint is NOT enabled' do
before do
setting.spam_check_endpoint_enabled = false
end
it { is_expected.to allow_value('https://example.org/spam_check').for(:spam_check_endpoint_url) }
it { is_expected.not_to allow_value('nonsense').for(:spam_check_endpoint_url) }
it { is_expected.to allow_value(nil).for(:spam_check_endpoint_url) }
it { is_expected.to allow_value('').for(:spam_check_endpoint_url) }
end
end
context 'when snowplow is enabled' do
before do
setting.snowplow_enabled = true
......
......@@ -38,6 +38,8 @@ describe API::Settings, 'Settings' do
expect(json_response).not_to have_key('performance_bar_allowed_group_path')
expect(json_response).not_to have_key('performance_bar_enabled')
expect(json_response['snippet_size_limit']).to eq(50.megabytes)
expect(json_response['spam_check_endpoint_enabled']).to be_falsey
expect(json_response['spam_check_endpoint_url']).to be_nil
end
end
......@@ -90,7 +92,9 @@ describe API::Settings, 'Settings' do
push_event_activities_limit: 2,
snippet_size_limit: 5,
issues_create_limit: 300,
raw_blob_request_limit: 300
raw_blob_request_limit: 300,
spam_check_endpoint_enabled: true,
spam_check_endpoint_url: 'https://example.com/spam_check'
}
expect(response).to have_gitlab_http_status(:ok)
......@@ -129,6 +133,8 @@ describe API::Settings, 'Settings' do
expect(json_response['snippet_size_limit']).to eq(5)
expect(json_response['issues_create_limit']).to eq(300)
expect(json_response['raw_blob_request_limit']).to eq(300)
expect(json_response['spam_check_endpoint_enabled']).to be_truthy
expect(json_response['spam_check_endpoint_url']).to eq('https://example.com/spam_check')
end
end
......@@ -390,5 +396,14 @@ describe API::Settings, 'Settings' do
expect(json_response['error']).to eq('sourcegraph_url is missing')
end
end
context "missing spam_check_endpoint_url value when spam_check_endpoint_enabled is true" do
it "returns a blank parameter error message" do
put api("/application/settings", admin), params: { spam_check_endpoint_enabled: true }
expect(response).to have_gitlab_http_status(:bad_request)
expect(json_response['error']).to eq('spam_check_endpoint_url is missing')
end
end
end
end
......@@ -25,20 +25,105 @@ describe Spam::SpamVerdictService do
subject { service.execute }
before do
allow(service).to receive(:akismet_verdict).and_return(nil)
allow(service).to receive(:spam_verdict_verdict).and_return(nil)
end
context 'if all services return nil' do
it 'renders ALLOW verdict' do
expect(subject).to eq ALLOW
end
end
context 'if only one service returns a verdict' do
context 'and it is supported' do
before do
allow(service).to receive(:akismet_verdict).and_return(DISALLOW)
end
it 'renders that verdict' do
expect(subject).to eq DISALLOW
end
end
context 'and it is unexpected' do
before do
allow(service).to receive(:akismet_verdict).and_return("unexpected")
end
it 'allows' do
expect(subject).to eq ALLOW
end
end
end
context 'if more than one service returns a verdict' do
context 'and they are supported' do
before do
allow(service).to receive(:akismet_verdict).and_return(DISALLOW)
allow(service).to receive(:spam_verdict).and_return(BLOCK_USER)
end
it 'renders the more restrictive verdict' do
expect(subject).to eq BLOCK_USER
end
end
context 'and one is supported' do
before do
allow(service).to receive(:akismet_verdict).and_return('nonsense')
allow(service).to receive(:spam_verdict).and_return(BLOCK_USER)
end
it 'renders the more restrictive verdict' do
expect(subject).to eq BLOCK_USER
end
end
context 'and one is supported' do
before do
allow(service).to receive(:akismet_verdict).and_return('nonsense')
allow(service).to receive(:spam_verdict).and_return(BLOCK_USER)
end
it 'renders the more restrictive verdict' do
expect(subject).to eq BLOCK_USER
end
end
context 'and none are supported' do
before do
allow(service).to receive(:akismet_verdict).and_return('nonsense')
allow(service).to receive(:spam_verdict).and_return('rubbish')
end
it 'renders the more restrictive verdict' do
expect(subject).to eq ALLOW
end
end
end
end
describe '#akismet_verdict' do
subject { service.send(:akismet_verdict) }
context 'if Akismet is enabled' do
before do
stub_application_setting(akismet_enabled: true)
allow_next_instance_of(Spam::AkismetService) do |service|
allow(service).to receive(:spam?).and_return(spam_verdict)
allow(service).to receive(:spam?).and_return(akismet_result)
end
end
context 'if Akismet considers it spam' do
let(:spam_verdict) { true }
let(:akismet_result) { true }
context 'if reCAPTCHA is enabled' do
before do
stub_application_setting(recaptcha_enabled: true)
end
it 'requires reCAPTCHA' do
it 'returns require reCAPTCHA verdict' do
expect(subject).to eq REQUIRE_RECAPTCHA
end
end
......@@ -48,18 +133,148 @@ describe Spam::SpamVerdictService do
stub_application_setting(recaptcha_enabled: false)
end
it 'disallows the change' do
it 'renders disallow verdict' do
expect(subject).to eq DISALLOW
end
end
end
context 'if Akismet does not consider it spam' do
let(:spam_verdict) { false }
let(:akismet_result) { false }
it 'allows the change' do
it 'renders allow verdict' do
expect(subject).to eq ALLOW
end
end
end
context 'if Akismet is not enabled' do
before do
stub_application_setting(akismet_enabled: false)
end
it 'renders allow verdict' do
expect(subject).to eq ALLOW
end
end
end
describe '#spam_verdict' do
subject { service.send(:spam_verdict) }
context 'if a Spam Check endpoint enabled and set to a URL' do
let(:spam_check_body) { {} }
let(:spam_check_http_status) { nil }
before do
stub_application_setting(spam_check_endpoint_enabled: true)
stub_application_setting(spam_check_endpoint_url: "http://www.spamcheckurl.com/spam_check")
stub_request(:any, /.*spamcheckurl.com.*/).to_return( body: spam_check_body.to_json, status: spam_check_http_status )
end
context 'if the endpoint is accessible' do
let(:spam_check_http_status) { 200 }
let(:error) { nil }
let(:verdict) { nil }
let(:spam_check_body) do
{ verdict: verdict, error: error }
end
context 'the result is a valid verdict' do
let(:verdict) { 'allow' }
it 'returns the verdict' do
expect(subject).to eq ALLOW
end
end
context 'the verdict is an unexpected string' do
let(:verdict) { 'this is fine' }
it 'returns nil' do
expect(subject).to be_nil
end
end
context 'the JSON is malformed' do
let(:spam_check_body) { 'this is fine' }
it 'returns allow' do
expect(subject).to eq ALLOW
end
end
context 'the verdict is an empty string' do
let(:verdict) { '' }
it 'returns nil' do
expect(subject).to be_nil
end
end
context 'the verdict is nil' do
let(:verdict) { nil }
it 'returns nil' do
expect(subject).to be_nil
end
end
context 'there is an error' do
let(:error) { "Sorry Dave, I can't do that" }
it 'returns nil' do
expect(subject).to be_nil
end
end
context 'the HTTP status is not 200' do
let(:spam_check_http_status) { 500 }
it 'returns nil' do
expect(subject).to be_nil
end
end
context 'the confused API endpoint returns both an error and a verdict' do
let(:verdict) { 'disallow' }
let(:error) { 'oh noes!' }
it 'renders the verdict' do
expect(subject).to eq DISALLOW
end
end
end
context 'if the endpoint times out' do
before do
stub_request(:any, /.*spamcheckurl.com.*/).to_timeout
end
it 'returns nil' do
expect(subject).to be_nil
end
end
end
context 'if a Spam Check endpoint is not set' do
before do
stub_application_setting(spam_check_endpoint_url: nil)
end
it 'returns nil' do
expect(subject).to be_nil
end
end
context 'if Spam Check endpoint is not enabled' do
before do
stub_application_setting(spam_check_endpoint_enabled: false)
end
it 'returns nil' do
expect(subject).to be_nil
end
end
end
end
......@@ -5,5 +5,6 @@ shared_context 'includes Spam constants' do
stub_const('REQUIRE_RECAPTCHA', Spam::SpamConstants::REQUIRE_RECAPTCHA)
stub_const('DISALLOW', Spam::SpamConstants::DISALLOW)
stub_const('ALLOW', Spam::SpamConstants::ALLOW)
stub_const('BLOCK_USER', Spam::SpamConstants::BLOCK_USER)
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment