Commit ec7a853e authored by rpereira2's avatar rpereira2

Use the pod and container labels

- K8s 1.14 introduced the pod and container labels and deprecated the
pod_name and container_name labels. K8s 1.16 removes the pod_name and
container_name labels.

- Metrics from K8s versions before 1.14 contain pod_name and
container_name labels. Metrics from K8s 1.14/1.15 contain pod, pod_name,
container and container_name labels. Metrics from K8s 1.16 onwards
contain pod and container labels.

- According to our docs, we need to support K8s 1.12 to 1.16.

- This commit changes existing queries to use pod instead of pod_name
and container instead of container_name. These changed queries should
work on K8s 1.14 onwards.

- This commit also adds a second query using `OR`. The second query
uses pod_name instead of pod, and container_name instead of container.
This second query should work on K8s 1.12 to 1.15.
parent c5dfc913
---
title: 'Fix the default metrics dashboard to work on K8s versions 1.12 to 1.16'
merge_request: 36863
author:
type: fixed
...@@ -10,7 +10,9 @@ panel_groups: ...@@ -10,7 +10,9 @@ panel_groups:
weight: 4 weight: 4
metrics: metrics:
- id: system_metrics_kubernetes_container_memory_total - id: system_metrics_kubernetes_container_memory_total
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024' # Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024'
label: Total (GB) label: Total (GB)
unit: GB unit: GB
- title: "Core Usage (Total)" - title: "Core Usage (Total)"
...@@ -19,7 +21,9 @@ panel_groups: ...@@ -19,7 +21,9 @@ panel_groups:
weight: 3 weight: 3
metrics: metrics:
- id: system_metrics_kubernetes_container_cores_total - id: system_metrics_kubernetes_container_cores_total
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job)' # Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job)'
label: Total (cores) label: Total (cores)
unit: "cores" unit: "cores"
- title: "Memory Usage (Pod average)" - title: "Memory Usage (Pod average)"
...@@ -28,7 +32,9 @@ panel_groups: ...@@ -28,7 +32,9 @@ panel_groups:
weight: 2 weight: 2
metrics: metrics:
- id: system_metrics_kubernetes_container_memory_average - id: system_metrics_kubernetes_container_memory_average
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024' # Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
label: Pod average (MB) label: Pod average (MB)
unit: MB unit: MB
- title: "Canary: Memory Usage (Pod Average)" - title: "Canary: Memory Usage (Pod Average)"
...@@ -37,7 +43,9 @@ panel_groups: ...@@ -37,7 +43,9 @@ panel_groups:
weight: 2 weight: 2
metrics: metrics:
- id: system_metrics_kubernetes_container_memory_average_canary - id: system_metrics_kubernetes_container_memory_average_canary
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024' # Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
label: Pod average (MB) label: Pod average (MB)
unit: MB unit: MB
track: canary track: canary
...@@ -47,7 +55,9 @@ panel_groups: ...@@ -47,7 +55,9 @@ panel_groups:
weight: 1 weight: 1
metrics: metrics:
- id: system_metrics_kubernetes_container_core_usage - id: system_metrics_kubernetes_container_core_usage
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))' # Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod)) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
label: Pod average (cores) label: Pod average (cores)
unit: "cores" unit: "cores"
- title: "Canary: Core Usage (Pod Average)" - title: "Canary: Core Usage (Pod Average)"
...@@ -56,7 +66,9 @@ panel_groups: ...@@ -56,7 +66,9 @@ panel_groups:
weight: 1 weight: 1
metrics: metrics:
- id: system_metrics_kubernetes_container_core_usage_canary - id: system_metrics_kubernetes_container_core_usage_canary
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))' # Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod)) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
label: Pod average (cores) label: Pod average (cores)
unit: "cores" unit: "cores"
track: canary track: canary
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment