Commit ec7a853e authored by rpereira2's avatar rpereira2

Use the pod and container labels

- K8s 1.14 introduced the pod and container labels and deprecated the
pod_name and container_name labels. K8s 1.16 removes the pod_name and
container_name labels.

- Metrics from K8s versions before 1.14 contain pod_name and
container_name labels. Metrics from K8s 1.14/1.15 contain pod, pod_name,
container and container_name labels. Metrics from K8s 1.16 onwards
contain pod and container labels.

- According to our docs, we need to support K8s 1.12 to 1.16.

- This commit changes existing queries to use pod instead of pod_name
and container instead of container_name. These changed queries should
work on K8s 1.14 onwards.

- This commit also adds a second query using `OR`. The second query
uses pod_name instead of pod, and container_name instead of container.
This second query should work on K8s 1.12 to 1.15.
parent c5dfc913
---
title: 'Fix the default metrics dashboard to work on K8s versions 1.12 to 1.16'
merge_request: 36863
author:
type: fixed
......@@ -10,7 +10,9 @@ panel_groups:
weight: 4
metrics:
- id: system_metrics_kubernetes_container_memory_total
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024'
# Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) /1024/1024/1024'
label: Total (GB)
unit: GB
- title: "Core Usage (Total)"
......@@ -19,7 +21,9 @@ panel_groups:
weight: 3
metrics:
- id: system_metrics_kubernetes_container_cores_total
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job)'
# Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job)'
label: Total (cores)
unit: "cores"
- title: "Memory Usage (Pod average)"
......@@ -28,7 +32,9 @@ panel_groups:
weight: 2
metrics:
- id: system_metrics_kubernetes_container_memory_average
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
# Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
label: Pod average (MB)
unit: MB
- title: "Canary: Memory Usage (Pod Average)"
......@@ -37,7 +43,9 @@ panel_groups:
weight: 2
metrics:
- id: system_metrics_kubernetes_container_memory_average_canary
query_range: 'avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
# Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024 OR avg(sum(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) by (job)) without (job) / count(avg(container_memory_usage_bytes{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}) without (job)) /1024/1024'
label: Pod average (MB)
unit: MB
track: canary
......@@ -47,7 +55,9 @@ panel_groups:
weight: 1
metrics:
- id: system_metrics_kubernetes_container_core_usage
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
# Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod)) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-([^c].*|c([^a]|a([^n]|n([^a]|a([^r]|r[^y])))).*|)-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
label: Pod average (cores)
unit: "cores"
- title: "Canary: Core Usage (Pod Average)"
......@@ -56,7 +66,9 @@ panel_groups:
weight: 1
metrics:
- id: system_metrics_kubernetes_container_core_usage_canary
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
# Remove the second metric (after OR) when we drop support for K8s 1.13
# https://gitlab.com/gitlab-org/gitlab/-/issues/229279
query_range: 'avg(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container!="POD",pod=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod)) OR avg(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (job)) without (job) / count(sum(rate(container_cpu_usage_seconds_total{container_name!="POD",pod_name=~"^{{ci_environment_slug}}-canary-(.*)",namespace="{{kube_namespace}}"}[15m])) by (pod_name))'
label: Pod average (cores)
unit: "cores"
track: canary
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment