Commit 8b4a4664 authored by Toon Claes's avatar Toon Claes

Add Geo Prometheus metrics about the various number of events

Add metrics for:
 - Geo event log count
 - Geo event log max ID
 - Repository created max ID
 - Repository updated max ID
 - Repository deleted max ID
 - Repository renamed max ID
 - Repositories changed max ID
 - LFS object deleted max ID
 - Job artifact deleted max ID
 - Hashed storage migrated max ID
 - Hashed storage attachments max ID

It logs maximum IDs because Geo::PruneEventLogWorker might remove old
events, so COUNT isn't representing an accurate accumulate value for
the number of events created.

Closes gitlab-org/gitlab-ee#4688.
parent 5281e722
---
title: Add Geo Prometheus metrics about the various number of events
merge_request: 4413
author:
type: added
......@@ -8,6 +8,13 @@ class GeoNodeStatus < ActiveRecord::Base
attr_writer :health_status
attr_accessor :storage_shards
# Prometheus metrics, no need to store them in the database
attr_accessor :event_log_count, :event_log_max_id,
:repository_created_max_id, :repository_updated_max_id,
:repository_deleted_max_id, :repository_renamed_max_id, :repositories_changed_max_id,
:lfs_object_deleted_max_id, :job_artifact_deleted_max_id,
:hashed_storage_migrated_max_id, :hashed_storage_attachments_max_id
# Be sure to keep this consistent with Prometheus naming conventions
PROMETHEUS_METRICS = {
db_replication_lag_seconds: 'Database replication lag (seconds)',
......@@ -34,7 +41,18 @@ class GeoNodeStatus < ActiveRecord::Base
cursor_last_event_id: 'Last database ID of the event log processed by the secondary',
cursor_last_event_timestamp: 'Time of the event log processed by the secondary',
last_successful_status_check_timestamp: 'Time when Geo node status was updated internally',
status_message: 'Summary of health status'
status_message: 'Summary of health status',
event_log_count: 'Number of entries in the Geo event log',
event_log_max_id: 'Highest ID present in the Geo event log',
repository_created_max_id: 'Highest ID present in repositories created',
repository_updated_max_id: 'Highest ID present in repositories updated',
repository_deleted_max_id: 'Highest ID present in repositories deleted',
repository_renamed_max_id: 'Highest ID present in repositories renamed',
repositories_changed_max_id: 'Highest ID present in repositories changed',
lfs_object_deleted_max_id: 'Highest ID present in LFS objects deleted',
job_artifact_deleted_max_id: 'Highest ID present in job artifacts deleted',
hashed_storage_migrated_max_id: 'Highest ID present in projects migrated to hashed storage',
hashed_storage_attachments_max_id: 'Highest ID present in attachments migrated to hashed storage'
}.freeze
def self.current_node_status
......@@ -94,6 +112,19 @@ class GeoNodeStatus < ActiveRecord::Base
self.version = Gitlab::VERSION
self.revision = Gitlab::REVISION
self.event_log_count = Geo::EventLog.count
# Geo::PruneEventLogWorker might remove old events, so log maximum id
self.event_log_max_id = Geo::EventLog.maximum(:id)
self.repository_created_max_id = Geo::RepositoryCreatedEvent.maximum(:id)
self.repository_updated_max_id = Geo::RepositoryUpdatedEvent.maximum(:id)
self.repository_deleted_max_id = Geo::RepositoryDeletedEvent.maximum(:id)
self.repository_renamed_max_id = Geo::RepositoryRenamedEvent.maximum(:id)
self.repositories_changed_max_id = Geo::RepositoriesChangedEvent.maximum(:id)
self.lfs_object_deleted_max_id = Geo::LfsObjectDeletedEvent.maximum(:id)
self.job_artifact_deleted_max_id = Geo::JobArtifactDeletedEvent.maximum(:id)
self.hashed_storage_migrated_max_id = Geo::HashedStorageMigratedEvent.maximum(:id)
self.hashed_storage_attachments_max_id = Geo::HashedStorageAttachmentsEvent.maximum(:id)
load_primary_data
load_secondary_data
......
......@@ -34,7 +34,18 @@ describe Geo::MetricsUpdateService, :geo do
last_event_id: 2,
last_event_date: event_date,
cursor_last_event_id: 1,
cursor_last_event_date: event_date
cursor_last_event_date: event_date,
event_log_count: 55,
event_log_max_id: 555,
repository_created_max_id: 43,
repository_updated_max_id: 132,
repository_deleted_max_id: 23,
repository_renamed_max_id: 11,
repositories_changed_max_id: 109,
lfs_object_deleted_max_id: 84,
job_artifact_deleted_max_id: 78,
hashed_storage_migrated_max_id: 9,
hashed_storage_attachments_max_id: 65
}
end
......@@ -124,6 +135,17 @@ describe Geo::MetricsUpdateService, :geo do
expect(metric_value(:geo_cursor_last_event_id)).to eq(1)
expect(metric_value(:geo_cursor_last_event_timestamp)).to eq(event_date.to_i)
expect(metric_value(:geo_last_successful_status_check_timestamp)).to be_truthy
expect(metric_value(:geo_event_log)).to eq(55)
expect(metric_value(:geo_event_log_max_id)).to eq(555)
expect(metric_value(:geo_repository_created_max_id)).to eq(43)
expect(metric_value(:geo_repository_updated_max_id)).to eq(132)
expect(metric_value(:geo_repository_deleted_max_id)).to eq(23)
expect(metric_value(:geo_repository_renamed_max_id)).to eq(11)
expect(metric_value(:geo_repositories_changed_max_id)).to eq(109)
expect(metric_value(:geo_lfs_object_deleted_max_id)).to eq(84)
expect(metric_value(:geo_job_artifact_deleted_max_id)).to eq(78)
expect(metric_value(:geo_hashed_storage_migrated_max_id)).to eq(9)
expect(metric_value(:geo_hashed_storage_attachments_max_id)).to eq(65)
end
it 'increments a counter when metrics fail to retrieve' do
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment