From 7cbfa0f9969f98f77ef207a36b7da61695840f3f Mon Sep 17 00:00:00 2001 From: Ultradesu Date: Mon, 6 Apr 2026 15:38:18 +0100 Subject: [PATCH] Added telemt dashboard --- k8s/apps/mtproxy/telemt-servicemonitor.yaml | 3 + k8s/core/prom-stack/telemt-dashboard-cm.yaml | 456 ++++++------------- 2 files changed, 153 insertions(+), 306 deletions(-) diff --git a/k8s/apps/mtproxy/telemt-servicemonitor.yaml b/k8s/apps/mtproxy/telemt-servicemonitor.yaml index d953339..3fa9cb1 100644 --- a/k8s/apps/mtproxy/telemt-servicemonitor.yaml +++ b/k8s/apps/mtproxy/telemt-servicemonitor.yaml @@ -16,6 +16,9 @@ spec: interval: 30s scrapeTimeout: 10s honorLabels: true + relabelings: + - sourceLabels: [__meta_kubernetes_pod_node_name] + targetLabel: node namespaceSelector: matchNames: - mtproxy diff --git a/k8s/core/prom-stack/telemt-dashboard-cm.yaml b/k8s/core/prom-stack/telemt-dashboard-cm.yaml index 3453b73..4da0bb3 100644 --- a/k8s/core/prom-stack/telemt-dashboard-cm.yaml +++ b/k8s/core/prom-stack/telemt-dashboard-cm.yaml @@ -7,9 +7,7 @@ metadata: data: telemt.json: |- { - "annotations": { - "list": [] - }, + "annotations": { "list": [] }, "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 1, @@ -18,83 +16,17 @@ data: "liveNow": false, "panels": [ { - "title": "Uptime", - "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 0, "y": 0 }, + "title": "Nodes Overview", + "type": "table", + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 0 }, "id": 1, - "options": { - "colorMode": "value", - "graphMode": "none", - "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, - "textMode": "auto" - }, - "fieldConfig": { - "defaults": { - "unit": "s", - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "red", "value": null }, - { "color": "green", "value": 60 } - ] - } - }, - "overrides": [] - }, - "targets": [ - { - "expr": "telemt_uptime_seconds{instance=~\"$instance\"}", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "datasource": { "type": "prometheus", "uid": "${datasource}" } - }, - { - "title": "Active ME Writers", - "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 4, "y": 0 }, - "id": 2, - "options": { - "colorMode": "value", - "graphMode": "area", - "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, - "textMode": "auto" - }, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "red", "value": null }, - { "color": "green", "value": 1 } - ] - } - }, - "overrides": [] - }, - "targets": [ - { - "expr": "telemt_me_writers_active_current{instance=~\"$instance\"}", - "legendFormat": "{{instance}}", - "refId": "A" - } - ], - "datasource": { "type": "prometheus", "uid": "${datasource}" } - }, - { - "title": "Buffer Pool", - "type": "stat", - "gridPos": { "h": 4, "w": 4, "x": 8, "y": 0 }, - "id": 3, - "options": { - "colorMode": "value", - "graphMode": "none", - "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, - "textMode": "auto" - }, "fieldConfig": { "defaults": { + "custom": { + "align": "auto", + "cellOptions": { "type": "auto" }, + "inspect": false + }, "thresholds": { "mode": "absolute", "steps": [ @@ -102,49 +34,107 @@ data: ] } }, - "overrides": [] - }, - "targets": [ - { - "expr": "telemt_buffer_pool_buffers_total{instance=~\"$instance\", kind=\"in_use\"}", - "legendFormat": "{{instance}} in_use", - "refId": "A" - } - ], - "datasource": { "type": "prometheus", "uid": "${datasource}" } - }, - { - "title": "Connections (total / bad)", - "type": "stat", - "gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 }, - "id": 4, - "options": { - "colorMode": "value", - "graphMode": "area", - "reduceOptions": { "calc": "lastNotNull", "fields": "", "values": false }, - "textMode": "auto" - }, - "fieldConfig": { - "defaults": { - "thresholds": { - "mode": "absolute", - "steps": [ - { "color": "green", "value": null } + "overrides": [ + { + "matcher": { "id": "byName", "options": "Uptime" }, + "properties": [ + { "id": "unit", "value": "dtdurations" }, + { "id": "custom.width", "value": 140 } + ] + }, + { + "matcher": { "id": "byName", "options": "Bad Conn" }, + "properties": [ + { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 10 }, { "color": "red", "value": 100 }] } }, + { "id": "custom.cellOptions", "value": { "type": "color-background", "mode": "basic" } } + ] + }, + { + "matcher": { "id": "byName", "options": "Writers" }, + "properties": [ + { "id": "thresholds", "value": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } }, + { "id": "custom.cellOptions", "value": { "type": "color-background", "mode": "basic" } } ] } - }, - "overrides": [] + ] }, + "options": { + "showHeader": true, + "sortBy": [{ "displayName": "Node", "desc": false }], + "frameIndex": 0, + "footer": { "show": false } + }, + "transformations": [ + { + "id": "joinByField", + "options": { "byField": "node", "mode": "outer" } + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "Time 1": true, + "Time 2": true, + "Time 3": true, + "Time 4": true, + "Time 5": true, + "Time 6": true + }, + "renameByName": { + "node": "Node", + "Value #uptime": "Uptime", + "Value #writers": "Writers", + "Value #buffers": "Buffers In Use", + "Value #connections": "Connections", + "Value #bad": "Bad Conn", + "Value #hs_timeout": "HS Timeouts" + } + } + } + ], "targets": [ { - "expr": "telemt_connections_total{instance=~\"$instance\"}", - "legendFormat": "{{instance}} total", - "refId": "A" + "expr": "telemt_uptime_seconds{node=~\"$node\"}", + "legendFormat": "", + "refId": "uptime", + "format": "table", + "instant": true }, { - "expr": "telemt_connections_bad_total{instance=~\"$instance\"}", - "legendFormat": "{{instance}} bad", - "refId": "B" + "expr": "telemt_me_writers_active_current{node=~\"$node\"}", + "legendFormat": "", + "refId": "writers", + "format": "table", + "instant": true + }, + { + "expr": "telemt_buffer_pool_buffers_total{node=~\"$node\", kind=\"in_use\"}", + "legendFormat": "", + "refId": "buffers", + "format": "table", + "instant": true + }, + { + "expr": "telemt_connections_total{node=~\"$node\"}", + "legendFormat": "", + "refId": "connections", + "format": "table", + "instant": true + }, + { + "expr": "telemt_connections_bad_total{node=~\"$node\"}", + "legendFormat": "", + "refId": "bad", + "format": "table", + "instant": true + }, + { + "expr": "telemt_handshake_timeouts_total{node=~\"$node\"}", + "legendFormat": "", + "refId": "hs_timeout", + "format": "table", + "instant": true } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } @@ -152,17 +142,11 @@ data: { "title": "Connections Rate", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, "id": 10, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "unit": "cps", "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, @@ -173,38 +157,20 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "rate(telemt_connections_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} accepted", - "refId": "A" - }, - { - "expr": "rate(telemt_connections_bad_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} bad", - "refId": "B" - }, - { - "expr": "rate(telemt_handshake_timeouts_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} hs timeout", - "refId": "C" - } + { "expr": "rate(telemt_connections_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} accepted", "refId": "A" }, + { "expr": "rate(telemt_connections_bad_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} bad", "refId": "B" }, + { "expr": "rate(telemt_handshake_timeouts_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} hs timeout", "refId": "C" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "title": "Upstream Connect", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, "id": 11, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "unit": "cps", "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, @@ -215,31 +181,19 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "rate(telemt_upstream_connect_success_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} success", - "refId": "A" - }, - { - "expr": "rate(telemt_upstream_connect_fail_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} fail", - "refId": "B" - } + { "expr": "rate(telemt_upstream_connect_success_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} success", "refId": "A" }, + { "expr": "rate(telemt_upstream_connect_fail_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} fail", "refId": "B" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "title": "Upstream Connect Duration (success)", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, "id": 12, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "bars", - "fillOpacity": 50, - "stacking": { "mode": "normal" } - }, + "custom": { "drawStyle": "bars", "fillOpacity": 50, "stacking": { "mode": "normal" } }, "unit": "short", "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, @@ -250,28 +204,18 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "increase(telemt_upstream_connect_duration_success_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} {{bucket}}", - "refId": "A" - } + { "expr": "increase(telemt_upstream_connect_duration_success_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} {{bucket}}", "refId": "A" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "title": "ME Writers & Pool", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, "id": 13, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, "overrides": [] @@ -281,38 +225,20 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "telemt_me_writers_active_current{instance=~\"$instance\"}", - "legendFormat": "{{instance}} active", - "refId": "A" - }, - { - "expr": "telemt_me_writers_warm_current{instance=~\"$instance\"}", - "legendFormat": "{{instance}} warm", - "refId": "B" - }, - { - "expr": "telemt_pool_drain_active{instance=~\"$instance\"}", - "legendFormat": "{{instance}} draining", - "refId": "C" - } + { "expr": "telemt_me_writers_active_current{node=~\"$node\"}", "legendFormat": "{{node}} active", "refId": "A" }, + { "expr": "telemt_me_writers_warm_current{node=~\"$node\"}", "legendFormat": "{{node}} warm", "refId": "B" }, + { "expr": "telemt_pool_drain_active{node=~\"$node\"}", "legendFormat": "{{node}} draining", "refId": "C" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "title": "Per-User Active Connections", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 20 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 }, "id": 20, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, "overrides": [] @@ -322,28 +248,18 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "telemt_user_connections_current{instance=~\"$instance\"}", - "legendFormat": "{{instance}} {{user}}", - "refId": "A" - } + { "expr": "telemt_user_connections_current{node=~\"$node\"}", "legendFormat": "{{node}} {{user}}", "refId": "A" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "title": "Per-User Traffic", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 20 }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 }, "id": 21, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "unit": "Bps", "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, @@ -354,33 +270,19 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "rate(telemt_user_octets_from_client{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} {{user}} rx", - "refId": "A" - }, - { - "expr": "rate(telemt_user_octets_to_client{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} {{user}} tx", - "refId": "B" - } + { "expr": "rate(telemt_user_octets_from_client{node=~\"$node\"}[5m])", "legendFormat": "{{node}} {{user}} rx", "refId": "A" }, + { "expr": "rate(telemt_user_octets_to_client{node=~\"$node\"}[5m])", "legendFormat": "{{node}} {{user}} tx", "refId": "B" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { - "title": "DC→Client Batching", + "title": "DC->Client Payload", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 28 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 32 }, "id": 30, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "unit": "Bps", "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, @@ -391,28 +293,18 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "rate(telemt_me_d2c_payload_bytes_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} payload", - "refId": "A" - } + { "expr": "rate(telemt_me_d2c_payload_bytes_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} payload", "refId": "A" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "title": "ME Errors & Anomalies", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 28 }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 32 }, "id": 31, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "unit": "cps", "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, @@ -423,43 +315,21 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "rate(telemt_me_reconnect_attempts_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} reconnect", - "refId": "A" - }, - { - "expr": "rate(telemt_me_handshake_reject_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} hs reject", - "refId": "B" - }, - { - "expr": "rate(telemt_me_crc_mismatch_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} crc mismatch", - "refId": "C" - }, - { - "expr": "rate(telemt_desync_total{instance=~\"$instance\"}[5m])", - "legendFormat": "{{instance}} desync", - "refId": "D" - } + { "expr": "rate(telemt_me_reconnect_attempts_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} reconnect", "refId": "A" }, + { "expr": "rate(telemt_me_handshake_reject_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} hs reject", "refId": "B" }, + { "expr": "rate(telemt_me_crc_mismatch_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} crc mismatch", "refId": "C" }, + { "expr": "rate(telemt_desync_total{node=~\"$node\"}[5m])", "legendFormat": "{{node}} desync", "refId": "D" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "title": "Per-User Unique IPs", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 0, "y": 36 }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 40 }, "id": 40, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, "overrides": [] @@ -469,33 +339,19 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "telemt_user_unique_ips_current{instance=~\"$instance\"}", - "legendFormat": "{{instance}} {{user}} active", - "refId": "A" - }, - { - "expr": "telemt_user_unique_ips_recent_window{instance=~\"$instance\"}", - "legendFormat": "{{instance}} {{user}} recent", - "refId": "B" - } + { "expr": "telemt_user_unique_ips_current{node=~\"$node\"}", "legendFormat": "{{node}} {{user}} active", "refId": "A" }, + { "expr": "telemt_user_unique_ips_recent_window{node=~\"$node\"}", "legendFormat": "{{node}} {{user}} recent", "refId": "B" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } }, { "title": "Conntrack", "type": "timeseries", - "gridPos": { "h": 8, "w": 12, "x": 12, "y": 36 }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 40 }, "id": 41, "fieldConfig": { "defaults": { - "custom": { - "drawStyle": "line", - "lineInterpolation": "smooth", - "fillOpacity": 15, - "pointSize": 5, - "showPoints": "auto" - }, + "custom": { "drawStyle": "line", "lineInterpolation": "smooth", "fillOpacity": 15, "pointSize": 5, "showPoints": "auto" }, "unit": "cps", "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }] } }, @@ -506,21 +362,9 @@ data: "legend": { "displayMode": "list", "placement": "bottom" } }, "targets": [ - { - "expr": "rate(telemt_conntrack_delete_total{instance=~\"$instance\", result=\"attempt\"}[5m])", - "legendFormat": "{{instance}} delete attempt", - "refId": "A" - }, - { - "expr": "rate(telemt_conntrack_delete_total{instance=~\"$instance\", result=\"error\"}[5m])", - "legendFormat": "{{instance}} delete error", - "refId": "B" - }, - { - "expr": "telemt_conntrack_event_queue_depth{instance=~\"$instance\"}", - "legendFormat": "{{instance}} queue depth", - "refId": "C" - } + { "expr": "rate(telemt_conntrack_delete_total{node=~\"$node\", result=\"attempt\"}[5m])", "legendFormat": "{{node}} delete attempt", "refId": "A" }, + { "expr": "rate(telemt_conntrack_delete_total{node=~\"$node\", result=\"error\"}[5m])", "legendFormat": "{{node}} delete error", "refId": "B" }, + { "expr": "telemt_conntrack_event_queue_depth{node=~\"$node\"}", "legendFormat": "{{node}} queue depth", "refId": "C" } ], "datasource": { "type": "prometheus", "uid": "${datasource}" } } @@ -547,13 +391,13 @@ data: { "current": {}, "datasource": { "type": "prometheus", "uid": "${datasource}" }, - "definition": "label_values(telemt_uptime_seconds, instance)", + "definition": "label_values(telemt_uptime_seconds, node)", "hide": 0, "includeAll": true, - "label": "Instance", + "label": "Node", "multi": true, - "name": "instance", - "query": "label_values(telemt_uptime_seconds, instance)", + "name": "node", + "query": "label_values(telemt_uptime_seconds, node)", "refresh": 2, "regex": "", "skipUrlSync": false,