Files
homelab/k8s/core/prom-stack/grafana-alerting-configmap.yaml

76 lines
2.3 KiB
YAML
Raw Normal View History

apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-alerting
namespace: prometheus
data:
rules.yaml: |
apiVersion: 1
groups:
- orgId: 1
name: pasarguard_alerts
folder: Kubernetes
interval: 1m
rules:
- uid: pasarguard_cpu_throttling
title: VPN CPU Throttle
condition: A
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
2026-01-08 17:03:20 +00:00
datasourceUid: P76F38748CEC837F0
model:
expr: 'rate(container_cpu_cfs_throttled_periods_total{container="pasarguard-node"}[5m]) > 0.1'
refId: A
noDataState: NoData
execErrState: Alerting
for: 5m
annotations:
description: 'Throttling rate: {{ printf "%.2f" $values.A.Value }}'
summary: 'VPN node throttling CPU on {{ $labels.node }}'
labels:
severity: warning
contactpoints.yaml: |
apiVersion: 1
contactPoints:
- orgId: 1
name: telegram
receivers:
- uid: telegram_default
type: telegram
disableResolveMessage: false
settings:
bottoken: $TELEGRAM_BOT_TOKEN
2026-01-08 16:53:18 +00:00
chatid: "124317807"
message: |
{{ if eq .Status "firing" }}🔥 FIRING{{ else }}✅ RESOLVED{{ end }}
{{ range .Alerts }}
📊 <b>{{ .Labels.alertname }}</b>
{{ if .Annotations.summary }}{{ .Annotations.summary }}{{ end }}
🎯 <b>Details:</b>
• Pod: <code>{{ .Labels.pod }}</code>
• Node: <code>{{ .Labels.node }}</code>
• Namespace: <code>{{ .Labels.namespace }}</code>
{{ if .Annotations.description }}• {{ .Annotations.description }}{{ end }}
🔗 <a href="{{ .GeneratorURL }}">View in Grafana</a>
{{ end }}
parse_mode: HTML
policies.yaml: |
apiVersion: 1
policies:
- orgId: 1
receiver: telegram
group_by:
- grafana_folder
- alertname
group_wait: 10s
group_interval: 5m
repeat_interval: 4h