Compare commits
1 Commits
main
...
auto-updat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b9ca31b9a3 |
@@ -18,7 +18,6 @@ ArgoCD homelab project
|
||||
| **external-secrets** | [](https://ag.hexor.cy/applications/argocd/external-secrets) |
|
||||
| **kube-system-custom** | [](https://ag.hexor.cy/applications/argocd/kube-system-custom) |
|
||||
| **kubernetes-dashboard** | [](https://ag.hexor.cy/applications/argocd/kubernetes-dashboard) |
|
||||
| **longhorn** | [](https://ag.hexor.cy/applications/argocd/longhorn) |
|
||||
| **postgresql** | [](https://ag.hexor.cy/applications/argocd/postgresql) |
|
||||
| **prom-stack** | [](https://ag.hexor.cy/applications/argocd/prom-stack) |
|
||||
| **system-upgrade** | [](https://ag.hexor.cy/applications/argocd/system-upgrade) |
|
||||
|
||||
@@ -1,153 +0,0 @@
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: n8n-main
|
||||
labels:
|
||||
app: n8n
|
||||
component: main
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: n8n
|
||||
component: main
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: n8n
|
||||
component: main
|
||||
spec:
|
||||
serviceAccountName: n8n
|
||||
initContainers:
|
||||
- name: install-tools
|
||||
image: alpine:3.22
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
set -e
|
||||
if [ -x /tools/kubectl ]; then
|
||||
echo "kubectl already exists, skipping download"
|
||||
/tools/kubectl version --client
|
||||
exit 0
|
||||
fi
|
||||
echo "Downloading kubectl..."
|
||||
ARCH=$(uname -m)
|
||||
case $ARCH in
|
||||
x86_64) ARCH="amd64" ;;
|
||||
aarch64) ARCH="arm64" ;;
|
||||
esac
|
||||
wget -O /tools/kubectl "https://dl.k8s.io/release/$(wget -qO- https://dl.k8s.io/release/stable.txt)/bin/linux/${ARCH}/kubectl"
|
||||
chmod +x /tools/kubectl
|
||||
/tools/kubectl version --client
|
||||
volumeMounts:
|
||||
- name: tools
|
||||
mountPath: /tools
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
runAsNonRoot: true
|
||||
containers:
|
||||
- name: n8n
|
||||
image: docker.n8n.io/n8nio/n8n:latest
|
||||
ports:
|
||||
- containerPort: 5678
|
||||
name: http
|
||||
env:
|
||||
- name: PATH
|
||||
value: "/opt/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
- name: HOME
|
||||
value: "/home/node"
|
||||
- name: N8N_ENFORCE_SETTINGS_FILE_PERMISSIONS
|
||||
value: "true"
|
||||
- name: NODES_EXCLUDE
|
||||
value: "[]"
|
||||
- name: N8N_HOST
|
||||
value: "n8n.hexor.cy"
|
||||
- name: N8N_PORT
|
||||
value: "5678"
|
||||
- name: N8N_PROTOCOL
|
||||
value: "https"
|
||||
- name: N8N_RUNNERS_ENABLED
|
||||
value: "true"
|
||||
- name: N8N_RUNNERS_MODE
|
||||
value: "external"
|
||||
- name: EXECUTIONS_MODE
|
||||
value: "queue"
|
||||
- name: QUEUE_BULL_REDIS_HOST
|
||||
value: "n8n-redis"
|
||||
- name: NODE_ENV
|
||||
value: "production"
|
||||
- name: WEBHOOK_URL
|
||||
value: "https://n8n.hexor.cy/"
|
||||
- name: GENERIC_TIMEZONE
|
||||
value: "Europe/Moscow"
|
||||
- name: TZ
|
||||
value: "Europe/Moscow"
|
||||
- name: DB_TYPE
|
||||
value: "postgresdb"
|
||||
- name: DB_POSTGRESDB_HOST
|
||||
value: "psql.psql.svc"
|
||||
- name: DB_POSTGRESDB_DATABASE
|
||||
value: "n8n"
|
||||
- name: DB_POSTGRESDB_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: username
|
||||
- name: DB_POSTGRESDB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: password
|
||||
- name: N8N_ENCRYPTION_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: encryptionkey
|
||||
- name: N8N_RUNNERS_AUTH_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: runnertoken
|
||||
volumeMounts:
|
||||
- name: n8n-data
|
||||
mountPath: /home/node/.n8n
|
||||
- name: tools
|
||||
mountPath: /opt/tools
|
||||
resources:
|
||||
requests:
|
||||
cpu: 2000m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 4000m
|
||||
memory: 2048Gi
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: http
|
||||
initialDelaySeconds: 120
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 10
|
||||
failureThreshold: 6
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /healthz/readiness
|
||||
port: http
|
||||
initialDelaySeconds: 60
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 10
|
||||
volumes:
|
||||
- name: n8n-data
|
||||
persistentVolumeClaim:
|
||||
claimName: n8n-data
|
||||
- name: tools
|
||||
persistentVolumeClaim:
|
||||
claimName: n8n-tools
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
runAsNonRoot: true
|
||||
fsGroup: 1000
|
||||
@@ -1,112 +0,0 @@
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: n8n-worker
|
||||
labels:
|
||||
app: n8n
|
||||
component: worker
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: n8n
|
||||
component: worker
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: n8n
|
||||
component: worker
|
||||
spec:
|
||||
serviceAccountName: n8n
|
||||
containers:
|
||||
- name: n8n-worker
|
||||
image: docker.n8n.io/n8nio/n8n:latest
|
||||
command: ["n8n", "worker"]
|
||||
env:
|
||||
- name: PATH
|
||||
value: "/opt/tools:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
|
||||
- name: HOME
|
||||
value: "/home/node"
|
||||
- name: NODES_EXCLUDE
|
||||
value: "[]"
|
||||
- name: N8N_ENFORCE_SETTINGS_FILE_PERMISSIONS
|
||||
value: "true"
|
||||
- name: N8N_RUNNERS_ENABLED
|
||||
value: "true"
|
||||
- name: N8N_RUNNERS_MODE
|
||||
value: "external"
|
||||
- name: N8N_PORT
|
||||
value: "80"
|
||||
- name: EXECUTIONS_MODE
|
||||
value: "queue"
|
||||
- name: QUEUE_BULL_REDIS_HOST
|
||||
value: "n8n-redis"
|
||||
- name: N8N_RUNNERS_TASK_BROKER_URI
|
||||
value: "http://n8n:80"
|
||||
- name: NODE_ENV
|
||||
value: "production"
|
||||
- name: GENERIC_TIMEZONE
|
||||
value: "Europe/Moscow"
|
||||
- name: TZ
|
||||
value: "Europe/Moscow"
|
||||
- name: DB_TYPE
|
||||
value: "postgresdb"
|
||||
- name: DB_POSTGRESDB_HOST
|
||||
value: "psql.psql.svc"
|
||||
- name: DB_POSTGRESDB_DATABASE
|
||||
value: "n8n"
|
||||
- name: DB_POSTGRESDB_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: username
|
||||
- name: DB_POSTGRESDB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: password
|
||||
- name: N8N_ENCRYPTION_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: encryptionkey
|
||||
- name: N8N_RUNNERS_AUTH_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: runnertoken
|
||||
volumeMounts:
|
||||
- name: n8n-data
|
||||
mountPath: /home/node/.n8n
|
||||
- name: tools
|
||||
mountPath: /opt/tools
|
||||
resources:
|
||||
requests:
|
||||
cpu: 2000m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 4000m
|
||||
memory: 2048Gi
|
||||
livenessProbe:
|
||||
exec:
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- "ps aux | grep '[n]8n worker' || exit 1"
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 30
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
volumes:
|
||||
- name: n8n-data
|
||||
persistentVolumeClaim:
|
||||
claimName: n8n-data
|
||||
- name: tools
|
||||
persistentVolumeClaim:
|
||||
claimName: n8n-tools
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
runAsNonRoot: true
|
||||
fsGroup: 1000
|
||||
@@ -10,10 +10,8 @@ spec:
|
||||
template:
|
||||
type: Opaque
|
||||
data:
|
||||
password: "{{ .psql | trim }}"
|
||||
username: "n8n"
|
||||
encryptionkey: "{{ .enc_pass | trim }}"
|
||||
runnertoken: "{{ .runner_token | trim }}"
|
||||
postgres-password: "{{ .psql | trim }}"
|
||||
N8N_ENCRYPTION_KEY: "{{ .enc_pass | trim }}"
|
||||
data:
|
||||
- secretKey: psql
|
||||
sourceRef:
|
||||
@@ -37,14 +35,3 @@ spec:
|
||||
metadataPolicy: None
|
||||
key: 18c92d73-9637-4419-8642-7f7b308460cb
|
||||
property: fields[0].value
|
||||
- secretKey: runner_token
|
||||
sourceRef:
|
||||
storeRef:
|
||||
name: vaultwarden-login
|
||||
kind: ClusterSecretStore
|
||||
remoteRef:
|
||||
conversionStrategy: Default
|
||||
decodingStrategy: None
|
||||
metadataPolicy: None
|
||||
key: 18c92d73-9637-4419-8642-7f7b308460cb
|
||||
property: fields[1].value
|
||||
|
||||
@@ -1,28 +0,0 @@
|
||||
---
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: n8n
|
||||
labels:
|
||||
app: n8n
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.middlewares: kube-system-https-redirect@kubernetescrd
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
spec:
|
||||
ingressClassName: traefik
|
||||
tls:
|
||||
- hosts:
|
||||
- n8n.hexor.cy
|
||||
secretName: n8n-tls
|
||||
rules:
|
||||
- host: n8n.hexor.cy
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: n8n
|
||||
port:
|
||||
number: 80
|
||||
@@ -1,18 +1,19 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
# Updated: Fixed n8n volume permissions issue
|
||||
|
||||
resources:
|
||||
- external-secrets.yaml
|
||||
- storage.yaml
|
||||
- rbac.yaml
|
||||
- redis-deployment.yaml
|
||||
- redis-service.yaml
|
||||
- deployment-main.yaml
|
||||
- deployment-worker.yaml
|
||||
- service.yaml
|
||||
- ingress.yaml
|
||||
|
||||
helmCharts:
|
||||
- name: n8n
|
||||
repo: https://community-charts.github.io/helm-charts
|
||||
version: 1.16.28
|
||||
releaseName: n8n
|
||||
namespace: n8n
|
||||
valuesFile: values-n8n.yaml
|
||||
includeCRDs: true
|
||||
- name: yacy
|
||||
repo: https://gt.hexor.cy/api/packages/ab/helm
|
||||
version: 0.1.2
|
||||
@@ -20,6 +21,3 @@ helmCharts:
|
||||
namespace: n8n
|
||||
valuesFile: values-yacy.yaml
|
||||
includeCRDs: true
|
||||
|
||||
commonLabels:
|
||||
app.kubernetes.io/name: n8n
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: n8n
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRole
|
||||
metadata:
|
||||
name: n8n-clusterrole
|
||||
rules:
|
||||
# Core API group ("")
|
||||
- apiGroups: [""]
|
||||
resources: ["*"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
|
||||
# Common built-in API groups
|
||||
- apiGroups: ["apps", "batch", "autoscaling", "extensions", "policy"]
|
||||
resources: ["*"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
|
||||
- apiGroups: ["networking.k8s.io", "rbac.authorization.k8s.io", "apiextensions.k8s.io"]
|
||||
resources: ["*"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
|
||||
- apiGroups: ["coordination.k8s.io", "discovery.k8s.io", "events.k8s.io"]
|
||||
resources: ["*"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
|
||||
- apiGroups: ["storage.k8s.io", "admissionregistration.k8s.io", "authentication.k8s.io", "authorization.k8s.io"]
|
||||
resources: ["*"]
|
||||
verbs: ["get", "list", "watch"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: n8n-clusterrolebinding
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
name: n8n-clusterrole
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: n8n
|
||||
namespace: n8n
|
||||
@@ -1,57 +0,0 @@
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: n8n-redis
|
||||
labels:
|
||||
app: redis
|
||||
component: n8n
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: redis
|
||||
component: n8n
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: redis
|
||||
component: n8n
|
||||
spec:
|
||||
containers:
|
||||
- name: redis
|
||||
image: redis:7-alpine
|
||||
ports:
|
||||
- containerPort: 6379
|
||||
name: redis
|
||||
command:
|
||||
- redis-server
|
||||
- --appendonly
|
||||
- "yes"
|
||||
- --save
|
||||
- "900 1"
|
||||
volumeMounts:
|
||||
- name: redis-data
|
||||
mountPath: /data
|
||||
resources:
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
limits:
|
||||
cpu: 200m
|
||||
memory: 256Mi
|
||||
livenessProbe:
|
||||
tcpSocket:
|
||||
port: 6379
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
readinessProbe:
|
||||
exec:
|
||||
command:
|
||||
- redis-cli
|
||||
- ping
|
||||
initialDelaySeconds: 5
|
||||
periodSeconds: 5
|
||||
volumes:
|
||||
- name: redis-data
|
||||
emptyDir: {}
|
||||
@@ -1,18 +0,0 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: n8n-redis
|
||||
labels:
|
||||
app: redis
|
||||
component: n8n
|
||||
spec:
|
||||
selector:
|
||||
app: redis
|
||||
component: n8n
|
||||
ports:
|
||||
- name: redis
|
||||
port: 6379
|
||||
targetPort: 6379
|
||||
protocol: TCP
|
||||
type: ClusterIP
|
||||
@@ -1,17 +0,0 @@
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: n8n
|
||||
labels:
|
||||
app: n8n
|
||||
spec:
|
||||
selector:
|
||||
app: n8n
|
||||
component: main
|
||||
ports:
|
||||
- name: http
|
||||
port: 80
|
||||
targetPort: 5678
|
||||
protocol: TCP
|
||||
type: ClusterIP
|
||||
@@ -2,23 +2,11 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: n8n-data
|
||||
name: n8n-home
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
storageClassName: longhorn
|
||||
storageClassName: nfs-csi
|
||||
resources:
|
||||
requests:
|
||||
storage: 10Gi
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: n8n-tools
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteMany
|
||||
storageClassName: longhorn
|
||||
resources:
|
||||
requests:
|
||||
storage: 20Gi
|
||||
|
||||
79
k8s/apps/n8n/values-n8n.yaml
Normal file
79
k8s/apps/n8n/values-n8n.yaml
Normal file
@@ -0,0 +1,79 @@
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
|
||||
db:
|
||||
type: postgresdb
|
||||
|
||||
main:
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
limits:
|
||||
cpu: 512m
|
||||
memory: 512Mi
|
||||
persistence:
|
||||
enabled: true
|
||||
existingClaim: n8n-home
|
||||
mountPath: /home/node/.n8n
|
||||
|
||||
podSecurityContext:
|
||||
fsGroup: 1000
|
||||
fsGroupChangePolicy: "OnRootMismatch"
|
||||
|
||||
# Fix NFS permission issues - required for NFS volumes
|
||||
initContainers:
|
||||
- name: fix-permissions
|
||||
image: busybox:1.35
|
||||
command:
|
||||
- sh
|
||||
- -c
|
||||
- |
|
||||
echo "Fixing permissions for NFS volume..."
|
||||
if [ ! -d "/home/node/.n8n" ]; then
|
||||
mkdir -p /home/node/.n8n
|
||||
fi
|
||||
chown -R 1000:1000 /home/node/.n8n
|
||||
chmod -R 775 /home/node/.n8n
|
||||
echo "Permissions fixed: $(ls -ld /home/node/.n8n)"
|
||||
volumeMounts:
|
||||
- name: node-modules
|
||||
mountPath: /home/node/.n8n
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
runAsGroup: 0
|
||||
|
||||
worker:
|
||||
mode: regular
|
||||
|
||||
webhook:
|
||||
url: https://n8n.hexor.cy
|
||||
|
||||
redis:
|
||||
enabled: true
|
||||
|
||||
existingEncryptionKeySecret: credentials
|
||||
|
||||
externalPostgresql:
|
||||
existingSecret: credentials
|
||||
host: "psql.psql.svc"
|
||||
username: "n8n"
|
||||
database: "n8n"
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
className: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.middlewares: kube-system-https-redirect@kubernetescrd
|
||||
|
||||
hosts:
|
||||
- host: n8n.hexor.cy
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
tls:
|
||||
- secretName: n8n-tls
|
||||
hosts:
|
||||
- '*.hexor.cy'
|
||||
|
||||
@@ -1,21 +0,0 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: longhorn
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: core
|
||||
destination:
|
||||
namespace: longhorn
|
||||
server: https://kubernetes.default.svc
|
||||
source:
|
||||
repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git
|
||||
targetRevision: HEAD
|
||||
path: k8s/core/longhorn
|
||||
syncPolicy:
|
||||
automated:
|
||||
selfHeal: true
|
||||
prune: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
#resources:
|
||||
# - app.yaml
|
||||
|
||||
helmCharts:
|
||||
- name: longhorn
|
||||
repo: https://charts.longhorn.io
|
||||
version: 1.11.0
|
||||
releaseName: longhorn
|
||||
namespace: longhorn
|
||||
valuesFile: values.yaml
|
||||
includeCRDs: true
|
||||
|
||||
@@ -1,4 +0,0 @@
|
||||
longhornUI:
|
||||
replicas: 1
|
||||
persistence:
|
||||
reclaimPolicy: "Retain"
|
||||
@@ -1,46 +0,0 @@
|
||||
apiVersion: monitoring.coreos.com/v1alpha1
|
||||
kind: AlertmanagerConfig
|
||||
metadata:
|
||||
name: telegram-notifications
|
||||
namespace: prometheus
|
||||
labels:
|
||||
app: kube-prometheus-stack-alertmanager
|
||||
release: prometheus
|
||||
spec:
|
||||
route:
|
||||
groupBy: ['alertname', 'cluster', 'service']
|
||||
groupWait: 10s
|
||||
groupInterval: 5m
|
||||
repeatInterval: 12h
|
||||
receiver: telegram
|
||||
routes:
|
||||
- matchers:
|
||||
- name: alertname
|
||||
value: Watchdog
|
||||
matchType: "="
|
||||
receiver: 'null'
|
||||
receivers:
|
||||
- name: telegram
|
||||
telegramConfigs:
|
||||
- botToken:
|
||||
name: alertmanager-telegram-secret
|
||||
key: TELEGRAM_BOT_TOKEN
|
||||
chatID: 124317807
|
||||
parseMode: HTML
|
||||
sendResolved: true
|
||||
disableNotifications: false
|
||||
message: |
|
||||
{{ if eq .Status "firing" }}🔥 FIRING{{ else }}✅ RESOLVED{{ end }}
|
||||
|
||||
{{ range .Alerts }}
|
||||
📊 <b>{{ .Labels.alertname }}</b>
|
||||
{{ .Annotations.summary }}
|
||||
|
||||
{{ if .Annotations.node }}🖥 <b>Node:</b> <code>{{ .Annotations.node }}</code>{{ end }}
|
||||
{{ if .Annotations.pod }}📦 <b>Pod:</b> <code>{{ .Annotations.pod }}</code>{{ end }}
|
||||
{{ if .Annotations.namespace }}📁 <b>Namespace:</b> <code>{{ .Annotations.namespace }}</code>{{ end }}
|
||||
{{ if .Annotations.throttle_rate }}⚠️ <b>Throttling rate:</b> {{ .Annotations.throttle_rate }}{{ end }}
|
||||
|
||||
🔗 <a href="{{ .GeneratorURL }}">View in Grafana</a>
|
||||
{{ end }}
|
||||
- name: 'null'
|
||||
@@ -45,7 +45,7 @@ data:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
reducer: min
|
||||
reducer: last
|
||||
refId: B
|
||||
type: reduce
|
||||
noDataState: NoData
|
||||
@@ -63,7 +63,7 @@ data:
|
||||
- orgId: 1
|
||||
name: kubernetes_alerts
|
||||
folder: Kubernetes
|
||||
interval: 2m
|
||||
interval: 30s
|
||||
rules:
|
||||
- uid: node_not_ready
|
||||
title: Kubernetes Node Not Ready
|
||||
@@ -71,17 +71,17 @@ data:
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: P76F38748CEC837F0
|
||||
model:
|
||||
expr: 'kube_node_status_condition{condition="Ready",status="false"}'
|
||||
expr: 'kube_node_status_condition{condition="Ready",status="true"} == 0'
|
||||
refId: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 600
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
@@ -98,12 +98,12 @@ data:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
reducer: min
|
||||
reducer: last
|
||||
refId: B
|
||||
type: reduce
|
||||
noDataState: NoData
|
||||
noDataState: Alerting
|
||||
execErrState: Alerting
|
||||
for: 10m
|
||||
for: 0s
|
||||
annotations:
|
||||
node: '{{ $labels.node }}'
|
||||
condition: '{{ $labels.condition }}'
|
||||
@@ -111,236 +111,6 @@ data:
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
- uid: node_high_memory_usage
|
||||
title: High Node Memory Usage
|
||||
condition: B
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: P76F38748CEC837F0
|
||||
model:
|
||||
expr: '(1 - (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes)) * 100'
|
||||
refId: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 80
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
reducer: max
|
||||
refId: B
|
||||
type: reduce
|
||||
noDataState: NoData
|
||||
execErrState: Alerting
|
||||
for: 5m
|
||||
annotations:
|
||||
node: '{{ $labels.instance }}'
|
||||
memory_usage: '{{ printf "%.1f%%" $values.A }}'
|
||||
summary: 'Node memory usage is critically high'
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
- uid: node_high_cpu_usage
|
||||
title: High Node CPU Usage
|
||||
condition: B
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: P76F38748CEC837F0
|
||||
model:
|
||||
expr: '100 - (avg by(instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)'
|
||||
refId: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 80
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
reducer: max
|
||||
refId: B
|
||||
type: reduce
|
||||
noDataState: NoData
|
||||
execErrState: Alerting
|
||||
for: 10m
|
||||
annotations:
|
||||
node: '{{ $labels.instance }}'
|
||||
cpu_usage: '{{ printf "%.1f%%" $values.A }}'
|
||||
summary: 'Node CPU usage is critically high'
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
- uid: node_high_disk_usage
|
||||
title: High Node Disk Usage
|
||||
condition: B
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: P76F38748CEC837F0
|
||||
model:
|
||||
expr: '(1 - (node_filesystem_avail_bytes{fstype=~"ext[234]|xfs|zfs|btrfs"} / node_filesystem_size_bytes)) * 100'
|
||||
refId: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 85
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
reducer: max
|
||||
refId: B
|
||||
type: reduce
|
||||
noDataState: NoData
|
||||
execErrState: Alerting
|
||||
for: 5m
|
||||
annotations:
|
||||
node: '{{ $labels.instance }}'
|
||||
filesystem: '{{ $labels.mountpoint }}'
|
||||
disk_usage: '{{ printf "%.1f%%" $values.A }}'
|
||||
summary: 'Node disk usage is critically high'
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
- uid: node_load_average_high
|
||||
title: High Node Load Average
|
||||
condition: B
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: P76F38748CEC837F0
|
||||
model:
|
||||
expr: 'node_load5 / on(instance) group_left count by(instance)(node_cpu_seconds_total{mode="idle"})'
|
||||
refId: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0.8
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
reducer: max
|
||||
refId: B
|
||||
type: reduce
|
||||
noDataState: NoData
|
||||
execErrState: Alerting
|
||||
for: 5m
|
||||
annotations:
|
||||
node: '{{ $labels.instance }}'
|
||||
load_average: '{{ printf "%.2f" $values.A }}'
|
||||
summary: 'Node load average is high relative to CPU count'
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
- uid: node_exporter_down
|
||||
title: Node Exporter Down
|
||||
condition: B
|
||||
data:
|
||||
- refId: A
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: P76F38748CEC837F0
|
||||
model:
|
||||
expr: 'up{job="node-exporter"}'
|
||||
refId: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
- refId: B
|
||||
relativeTimeRange:
|
||||
from: 300
|
||||
to: 0
|
||||
datasourceUid: __expr__
|
||||
model:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 1
|
||||
type: lt
|
||||
operator:
|
||||
type: and
|
||||
query:
|
||||
params: []
|
||||
datasource:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
reducer: min
|
||||
refId: B
|
||||
type: reduce
|
||||
noDataState: NoData
|
||||
execErrState: Alerting
|
||||
for: 2m
|
||||
annotations:
|
||||
node: '{{ $labels.instance }}'
|
||||
summary: 'Node exporter is down - unable to collect metrics'
|
||||
labels:
|
||||
severity: critical
|
||||
|
||||
contactpoints.yaml: |
|
||||
apiVersion: 1
|
||||
contactPoints:
|
||||
@@ -379,4 +149,4 @@ data:
|
||||
- alertname
|
||||
group_wait: 10s
|
||||
group_interval: 5m
|
||||
repeat_interval: 12h
|
||||
repeat_interval: 4h
|
||||
|
||||
@@ -5,7 +5,6 @@ resources:
|
||||
- persistentVolume.yaml
|
||||
- external-secrets.yaml
|
||||
- grafana-alerting-configmap.yaml
|
||||
- alertmanager-config.yaml
|
||||
|
||||
helmCharts:
|
||||
- name: kube-prometheus-stack
|
||||
|
||||
@@ -26,41 +26,11 @@ alertmanager:
|
||||
{{ if .Annotations.description }}<b>Description:</b> {{ .Annotations.description }}{{ end }}
|
||||
{{ end }}
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.middlewares: kube-system-https-redirect@kubernetescrd
|
||||
hosts:
|
||||
- prom.hexor.cy
|
||||
paths:
|
||||
- /alertmanager
|
||||
tls:
|
||||
- secretName: alertmanager-tls
|
||||
hosts:
|
||||
- prom.hexor.cy
|
||||
alertmanagerSpec:
|
||||
secrets:
|
||||
- alertmanager-telegram-secret
|
||||
externalUrl: https://prom.hexor.cy/alertmanager
|
||||
routePrefix: /alertmanager
|
||||
|
||||
prometheus:
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.middlewares: kube-system-https-redirect@kubernetescrd
|
||||
hosts:
|
||||
- prom.hexor.cy
|
||||
paths:
|
||||
- /
|
||||
tls:
|
||||
- secretName: prometheus-tls
|
||||
hosts:
|
||||
- prom.hexor.cy
|
||||
prometheusSpec:
|
||||
enableRemoteWriteReceiver: true
|
||||
additionalScrapeConfigs:
|
||||
|
||||
Reference in New Issue
Block a user