Commit a9122d43 authored by Nick Thomas's avatar Nick Thomas

Merge branch 'tc-geo-repo-check-metrics' into 'master'

Log repository check and failed count to Prometheus

Closes #5944

See merge request gitlab-org/gitlab-ee!5984
parents c9374dca ab859f88
class AddPartialIndexToProjectsForLastRepositoryCheckAt < ActiveRecord::Migration
include Gitlab::Database::MigrationHelpers
# Set this constant to true if this migration requires downtime.
DOWNTIME = false
disable_ddl_transaction!
INDEX_NAME = "index_projects_on_last_repository_check_at"
def up
add_concurrent_index(:projects, :last_repository_check_at, where: "last_repository_check_at IS NOT NULL", name: INDEX_NAME)
end
def down
remove_concurrent_index(:projects, :last_repository_check_at, where: "last_repository_check_at IS NOT NULL", name: INDEX_NAME)
end
end
......@@ -11,7 +11,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema.define(version: 20180626125654) do
ActiveRecord::Schema.define(version: 20180629191052) do
# These are extensions that must be enabled in order to support this database
enable_extension "plpgsql"
......@@ -2140,6 +2140,7 @@ ActiveRecord::Schema.define(version: 20180626125654) do
add_index "projects", ["id"], name: "index_projects_on_id_partial_for_visibility", unique: true, where: "(visibility_level = ANY (ARRAY[10, 20]))", using: :btree
add_index "projects", ["id"], name: "index_projects_on_mirror_and_mirror_trigger_builds_both_true", where: "((mirror IS TRUE) AND (mirror_trigger_builds IS TRUE))", using: :btree
add_index "projects", ["last_activity_at"], name: "index_projects_on_last_activity_at", using: :btree
add_index "projects", ["last_repository_check_at"], name: "index_projects_on_last_repository_check_at", where: "(last_repository_check_at IS NOT NULL)", using: :btree
add_index "projects", ["last_repository_check_failed"], name: "index_projects_on_last_repository_check_failed", using: :btree
add_index "projects", ["last_repository_updated_at"], name: "index_projects_on_last_repository_updated_at", using: :btree
add_index "projects", ["mirror_last_successful_update_at"], name: "index_projects_on_mirror_last_successful_update_at", using: :btree
......
......@@ -86,6 +86,8 @@ the `monitoring.sidekiq_exporter` configuration option in `gitlab.yml`.
| geo_wikis_verified_count | Gauge | 10.7 | Number of wikis verified on secondary | url
| geo_wikis_verification_failed_count | Gauge | 10.7 | Number of wikis failed to verify on secondary | url
| geo_wikis_checksum_mismatch_count | Gauge | 10.7 | Number of wikis that checksum mismatch on secondary | url
| geo_repositories_checked_count | Gauge | 11.1 | Number of repositories that have been checked via `git fsck` | url
| geo_repositories_checked_failed_count | Gauge | 11.1 | Number of repositories that have a failure from `git fsck` | url
### Ruby metrics
......
......@@ -189,6 +189,9 @@ Example response:
"replication_slots_used_count": 1,
"replication_slots_used_in_percentage": "100.00%",
"replication_slots_max_retained_wal_bytes": 0,
"repositories_checked_count": 20,
"repositories_checked_failed_count": 20,
"repositories_checked_in_percentage": "100.00%",
"repositories_checksummed_count": 20,
"repositories_checksum_failed_count": 5,
"repositories_checksummed_in_percentage": "48.78%",
......@@ -203,6 +206,9 @@ Example response:
"wikis_verification_failed_count": 3,
"wikis_verified_in_percentage": "24.39%",
"wikis_checksum_mismatch_count": 1,
"repositories_checked_count": 7,
"repositories_checked_failed_count": 2,
"repositories_checked_in_percentage": "17.07%",
"last_event_id": 23,
"last_event_timestamp": 1509681166,
"cursor_last_event_id": nil,
......@@ -259,6 +265,9 @@ Example response:
"wikis_verification_failed_count": 3,
"wikis_verified_in_percentage": "24.39%",
"wikis_checksum_mismatch_count": 1,
"repositories_checked_count": 5,
"repositories_checked_failed_count": 1,
"repositories_checked_in_percentage": "12.20%",
"last_event_id": 23,
"last_event_timestamp": 1509681166,
"cursor_last_event_id": 23,
......
......@@ -15,7 +15,8 @@ class GeoNodeStatus < ActiveRecord::Base
:repository_deleted_max_id, :repository_renamed_max_id, :repositories_changed_max_id,
:lfs_object_deleted_max_id, :job_artifact_deleted_max_id,
:lfs_objects_registry_count, :job_artifacts_registry_count, :attachments_registry_count,
:hashed_storage_migrated_max_id, :hashed_storage_attachments_max_id
:hashed_storage_migrated_max_id, :hashed_storage_attachments_max_id,
:repositories_checked_count, :repositories_checked_failed_count
# Be sure to keep this consistent with Prometheus naming conventions
PROMETHEUS_METRICS = {
......@@ -70,7 +71,9 @@ class GeoNodeStatus < ActiveRecord::Base
lfs_object_deleted_max_id: 'Highest ID present in LFS objects deleted',
job_artifact_deleted_max_id: 'Highest ID present in job artifacts deleted',
hashed_storage_migrated_max_id: 'Highest ID present in projects migrated to hashed storage',
hashed_storage_attachments_max_id: 'Highest ID present in attachments migrated to hashed storage'
hashed_storage_attachments_max_id: 'Highest ID present in attachments migrated to hashed storage',
repositories_checked_count: 'Number of repositories checked',
repositories_checked_failed_count: 'Number of failed repositories checked'
}.freeze
EXPIRATION_IN_MINUTES = 5
......@@ -187,6 +190,9 @@ class GeoNodeStatus < ActiveRecord::Base
self.wikis_checksummed_count = repository_verification_finder.count_verified_wikis
self.wikis_checksum_failed_count = repository_verification_finder.count_verification_failed_wikis
end
self.repositories_checked_count = Project.where.not(last_repository_check_at: nil).count
self.repositories_checked_failed_count = Project.where(last_repository_check_failed: true).count
end
end
......@@ -213,6 +219,9 @@ class GeoNodeStatus < ActiveRecord::Base
self.attachments_synced_missing_on_primary_count = attachments_finder.count_synced_missing_on_primary
load_verification_data
self.repositories_checked_count = Geo::ProjectRegistry.where.not(last_repository_check_at: nil).count
self.repositories_checked_failed_count = Geo::ProjectRegistry.where(last_repository_check_failed: true).count
end
end
......@@ -301,6 +310,10 @@ class GeoNodeStatus < ActiveRecord::Base
calc_percentage(wikis_count, wikis_verified_count)
end
def repositories_checked_in_percentage
calc_percentage(repositories_count, repositories_checked_count)
end
def lfs_objects_synced_in_percentage
calc_percentage(lfs_objects_count, lfs_objects_synced_count)
end
......
---
title: Log repository check and failed count to Prometheus
merge_request: 5984
author:
type: added
......@@ -332,6 +332,12 @@ module EE
end
expose :replication_slots_max_retained_wal_bytes
expose :repositories_checked_count
expose :repositories_checked_failed_count
expose :repositories_checked_in_percentage do |node|
number_to_percentage(node.repositories_checked_in_percentage, precision: 2)
end
expose :last_event_id
expose :last_event_timestamp
expose :cursor_last_event_id
......
......@@ -281,6 +281,13 @@ namespace :geo do
print "#{current_node_status.attachments_synced_count}/#{current_node_status.attachments_count} "
puts using_percentage(current_node_status.attachments_synced_in_percentage)
if Gitlab::CurrentSettings.repository_checks_enabled
print 'Repositories Checked: '.rjust(COLUMN_WIDTH)
show_failed_value(current_node_status.repositories_checked_failed_count)
print "#{current_node_status.repositories_checked_count}/#{current_node_status.repositories_count} "
puts using_percentage(current_node_status.repositories_checked_in_percentage)
end
print 'Sync Settings: '.rjust(COLUMN_WIDTH)
puts geo_node.namespaces.any? ? 'Selective' : 'Full'
......
......@@ -40,6 +40,9 @@
"wikis_verification_failed_count",
"wikis_verified_in_percentage",
"wikis_checksum_mismatch_count",
"repositories_checked_count",
"repositories_checked_failed_count",
"repositories_checked_in_percentage",
"replication_slots_count",
"replication_slots_used_count",
"replication_slots_used_in_percentage",
......@@ -100,6 +103,9 @@
"wikis_verification_failed_count": { "type": ["integer", "null"] },
"wikis_verified_in_percentage": { "type": "string" },
"wikis_checksum_mismatch_count": { "type": ["integer", "null"] },
"repositories_checked_count": { "type": ["integer", "null"] },
"repositories_checked_failed_count": { "type": ["integer", "null"] },
"repositories_checked_in_percentage": { "type": "string" },
"replication_slots_count": { "type": ["integer", "null"] },
"replication_slots_used_count": { "type": ["integer", "null"] },
"replication_slots_used_in_percentage": { "type": "string" },
......
......@@ -910,4 +910,70 @@ describe GeoNodeStatus, :geo do
expect(result.storage_shards_match?).to be true
end
end
describe '#repositories_checked_count' do
before do
stub_application_setting(repository_checks_enabled: true)
end
context 'current is a Geo primary' do
before do
stub_current_geo_node(primary)
end
it 'counts the number of repo checked projects' do
project_1.update!(last_repository_check_at: 2.minutes.ago)
project_2.update!(last_repository_check_at: 7.minutes.ago)
expect(status.repositories_checked_count).to eq(2)
end
end
context 'current is a Geo secondary' do
before do
stub_current_geo_node(secondary)
end
it 'counts the number of repo checked projects' do
create(:geo_project_registry, project: project_1, last_repository_check_at: 2.minutes.ago)
create(:geo_project_registry, project: project_2, last_repository_check_at: 7.minutes.ago)
create(:geo_project_registry, project: project_3)
expect(status.repositories_checked_count).to eq(2)
end
end
end
describe '#repositories_checked_failed_count' do
before do
stub_application_setting(repository_checks_enabled: true)
end
context 'current is a Geo primary' do
before do
stub_current_geo_node(primary)
end
it 'counts the number of repo check failed projects' do
project_1.update!(last_repository_check_at: 2.minutes.ago, last_repository_check_failed: true)
project_2.update!(last_repository_check_at: 7.minutes.ago, last_repository_check_failed: false)
expect(status.repositories_checked_failed_count).to eq(1)
end
end
context 'current is a Geo secondary' do
before do
stub_current_geo_node(secondary)
end
it 'counts the number of repo check failed projects' do
create(:geo_project_registry, project: project_1, last_repository_check_at: 2.minutes.ago, last_repository_check_failed: true)
create(:geo_project_registry, project: project_2, last_repository_check_at: 7.minutes.ago, last_repository_check_failed: false)
create(:geo_project_registry, project: project_3)
expect(status.repositories_checked_failed_count).to eq(1)
end
end
end
end
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment