apiVersion: v1
kind: ConfigMap
metadata:
name: grafana-alerting
namespace: prometheus
data:
rules.yaml: |
apiVersion: 1
groups:
- orgId: 1
name: pasarguard_alerts
folder: Kubernetes
interval: 1m
rules:
- uid: pasarguard_cpu_throttling
title: VPN CPU Throttle
condition: B
data:
- refId: A
relativeTimeRange:
from: 600
to: 0
datasourceUid: P76F38748CEC837F0
model:
expr: 'rate(container_cpu_cfs_throttled_periods_total{container="pasarguard-node"}[5m])'
refId: A
intervalMs: 1000
maxDataPoints: 43200
- refId: B
relativeTimeRange:
from: 600
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0.1
type: gt
operator:
type: and
query:
params: []
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
refId: B
type: reduce
noDataState: NoData
execErrState: Alerting
for: 5m
annotations:
pod: '{{ $labels.pod }}'
node: '{{ $labels.node }}'
namespace: '{{ $labels.namespace }}'
throttle_rate: '{{ printf "%.2f" $values.A }}'
summary: 'VPN node throttling CPU'
labels:
severity: warning
- orgId: 1
name: kubernetes_alerts
folder: Kubernetes
interval: 30s
rules:
- uid: node_not_ready
title: Kubernetes Node Not Ready
condition: B
data:
- refId: A
relativeTimeRange:
from: 300
to: 0
datasourceUid: P76F38748CEC837F0
model:
expr: 'kube_node_status_condition{condition="Ready",status="true"} == 0'
refId: A
intervalMs: 1000
maxDataPoints: 43200
- refId: B
relativeTimeRange:
from: 300
to: 0
datasourceUid: __expr__
model:
conditions:
- evaluator:
params:
- 0
type: gt
operator:
type: and
query:
params: []
datasource:
type: __expr__
uid: __expr__
expression: A
reducer: last
refId: B
type: reduce
noDataState: Alerting
execErrState: Alerting
for: 0s
annotations:
node: '{{ $labels.node }}'
condition: '{{ $labels.condition }}'
summary: 'Kubernetes node is not ready'
labels:
severity: critical
contactpoints.yaml: |
apiVersion: 1
contactPoints:
- orgId: 1
name: telegram
receivers:
- uid: telegram_default
type: telegram
disableResolveMessage: false
settings:
bottoken: $TELEGRAM_BOT_TOKEN
chatid: "124317807"
message: |
{{ if eq .Status "firing" }}🔥 FIRING{{ else }}✅ RESOLVED{{ end }}
{{ range .Alerts }}
📊 {{ .Labels.alertname }}
{{ .Annotations.summary }}
{{ if .Annotations.node }}🖥 Node: {{ .Annotations.node }}{{ end }}
{{ if .Annotations.pod }}📦 Pod: {{ .Annotations.pod }}{{ end }}
{{ if .Annotations.namespace }}📁 Namespace: {{ .Annotations.namespace }}{{ end }}
{{ if .Annotations.throttle_rate }}⚠️ Throttling rate: {{ .Annotations.throttle_rate }}{{ end }}
🔗 View in Grafana
{{ end }}
parse_mode: HTML
policies.yaml: |
apiVersion: 1
policies:
- orgId: 1
receiver: telegram
group_by:
- grafana_folder
- alertname
group_wait: 10s
group_interval: 5m
repeat_interval: 4h