diff --git a/k8s/core/prom-stack/grafana-alerting-configmap.yaml b/k8s/core/prom-stack/grafana-alerting-configmap.yaml index fc49d45..548cd15 100644 --- a/k8s/core/prom-stack/grafana-alerting-configmap.yaml +++ b/k8s/core/prom-stack/grafana-alerting-configmap.yaml @@ -260,7 +260,7 @@ data: to: 0 datasourceUid: P76F38748CEC837F0 model: - expr: 'node_load5 / on(instance) group_left count by(instance)(node_cpu_seconds_total{mode="idle"})' + expr: 'node_load15 / on(instance) group_left count by(instance)(node_cpu_seconds_total{mode="idle"})' refId: A intervalMs: 1000 maxDataPoints: 43200 @@ -273,7 +273,7 @@ data: conditions: - evaluator: params: - - 0.8 + - 2 type: gt operator: type: and @@ -283,16 +283,16 @@ data: type: __expr__ uid: __expr__ expression: A - reducer: max + reducer: last refId: B type: reduce noDataState: NoData execErrState: Alerting - for: 5m + for: 15m annotations: node: '{{ $labels.instance }}' load_average: '{{ printf "%.2f" $values.A }}' - summary: 'Node load average is high relative to CPU count' + summary: 'Node load average is critically high relative to CPU count' labels: severity: warning