Compare commits
35 Commits
auto-updat
...
auto-updat
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1a3d85e6b4 | ||
|
|
d3407914fd | ||
|
|
18c9203151 | ||
|
|
f0719a481b | ||
|
|
a1ff168ee1 | ||
|
|
999004cf1b | ||
| b9f8a7ad2c | |||
| 8a03bdfdf5 | |||
| 6deb288496 | |||
| ff7dccfe76 | |||
| 4995d482c4 | |||
| 0704ac33c7 | |||
| a499210fa2 | |||
| ca15b9a569 | |||
| abae3690f9 | |||
| a3540b1706 | |||
| f11c561f3a | |||
| 0c2ff6cc3b | |||
| 79d991da1e | |||
| 49b7e0a8c2 | |||
| 67b094ae70 | |||
| c9d29d91d2 | |||
| 8b12988aaf | |||
| f9943203fc | |||
| bbf2ea3911 | |||
| f62a196d1f | |||
| 23e5148814 | |||
| e1bd487ec9 | |||
| a88a08c972 | |||
| c83d69446b | |||
| 934860111f | |||
| 38f3fe57dc | |||
| 6b43f79fe2 | |||
| 84cca7223b | |||
|
|
47734f3447 |
@@ -16,6 +16,7 @@ ArgoCD homelab project
|
||||
| **authentik** | [](https://ag.hexor.cy/applications/argocd/authentik) |
|
||||
| **cert-manager** | [](https://ag.hexor.cy/applications/argocd/cert-manager) |
|
||||
| **external-secrets** | [](https://ag.hexor.cy/applications/argocd/external-secrets) |
|
||||
| **gpu** | [](https://ag.hexor.cy/applications/argocd/gpu) |
|
||||
| **kube-system-custom** | [](https://ag.hexor.cy/applications/argocd/kube-system-custom) |
|
||||
| **kubernetes-dashboard** | [](https://ag.hexor.cy/applications/argocd/kubernetes-dashboard) |
|
||||
| **longhorn** | [](https://ag.hexor.cy/applications/argocd/longhorn) |
|
||||
@@ -37,6 +38,7 @@ ArgoCD homelab project
|
||||
|
||||
| Application | Status |
|
||||
| :--- | :---: |
|
||||
| **comfyui** | [](https://ag.hexor.cy/applications/argocd/comfyui) |
|
||||
| **gitea** | [](https://ag.hexor.cy/applications/argocd/gitea) |
|
||||
| **greece-notifier** | [](https://ag.hexor.cy/applications/argocd/greece-notifier) |
|
||||
| **hexound** | [](https://ag.hexor.cy/applications/argocd/hexound) |
|
||||
@@ -45,6 +47,8 @@ ArgoCD homelab project
|
||||
| **jellyfin** | [](https://ag.hexor.cy/applications/argocd/jellyfin) |
|
||||
| **k8s-secrets** | [](https://ag.hexor.cy/applications/argocd/k8s-secrets) |
|
||||
| **khm** | [](https://ag.hexor.cy/applications/argocd/khm) |
|
||||
| **lidarr** | [](https://ag.hexor.cy/applications/argocd/lidarr) |
|
||||
| **mtproxy** | [](https://ag.hexor.cy/applications/argocd/mtproxy) |
|
||||
| **n8n** | [](https://ag.hexor.cy/applications/argocd/n8n) |
|
||||
| **ollama** | [](https://ag.hexor.cy/applications/argocd/ollama) |
|
||||
| **paperless** | [](https://ag.hexor.cy/applications/argocd/paperless) |
|
||||
|
||||
20
k8s/apps/comfyui/app.yaml
Normal file
20
k8s/apps/comfyui/app.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: comfyui
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: apps
|
||||
destination:
|
||||
namespace: comfyui
|
||||
server: https://kubernetes.default.svc
|
||||
source:
|
||||
repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git
|
||||
targetRevision: HEAD
|
||||
path: k8s/apps/comfyui
|
||||
syncPolicy:
|
||||
automated:
|
||||
selfHeal: true
|
||||
prune: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
57
k8s/apps/comfyui/deployment.yaml
Normal file
57
k8s/apps/comfyui/deployment.yaml
Normal file
@@ -0,0 +1,57 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: comfyui
|
||||
namespace: comfyui
|
||||
labels:
|
||||
app: comfyui
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: comfyui
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: comfyui
|
||||
spec:
|
||||
runtimeClassName: nvidia
|
||||
tolerations:
|
||||
- key: workload
|
||||
operator: Equal
|
||||
value: desktop
|
||||
effect: NoSchedule
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: uk-desktop.tail2fe2d.ts.net
|
||||
# Fix permissions mismatch usually happening when mapping host paths
|
||||
securityContext:
|
||||
runAsUser: 0
|
||||
initContainers:
|
||||
- name: create-data-dir
|
||||
image: busybox
|
||||
command: ["sh", "-c", "mkdir -p /host.data && chown -R 1000:1000 /host.data"]
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /host.data
|
||||
containers:
|
||||
- name: comfyui
|
||||
image: runpod/comfyui:latest-5090
|
||||
imagePullPolicy: IfNotPresent
|
||||
env:
|
||||
- name: COMFYUI_PORT
|
||||
value: "8188"
|
||||
ports:
|
||||
- containerPort: 8188
|
||||
name: http
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
volumeMounts:
|
||||
- name: data
|
||||
# For ai-dock images, /workspace is the persistent user directory
|
||||
mountPath: /workspace
|
||||
volumes:
|
||||
- name: data
|
||||
persistentVolumeClaim:
|
||||
claimName: comfyui-data-pvc
|
||||
9
k8s/apps/comfyui/kustomization.yaml
Normal file
9
k8s/apps/comfyui/kustomization.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- namespace.yaml
|
||||
- local-pv.yaml
|
||||
- pvc.yaml
|
||||
- deployment.yaml
|
||||
- service.yaml
|
||||
22
k8s/apps/comfyui/local-pv.yaml
Normal file
22
k8s/apps/comfyui/local-pv.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: comfyui-data-pv
|
||||
spec:
|
||||
capacity:
|
||||
storage: 200Gi
|
||||
volumeMode: Filesystem
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
storageClassName: local-path
|
||||
local:
|
||||
path: /data/comfyui
|
||||
nodeAffinity:
|
||||
required:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values:
|
||||
- uk-desktop.tail2fe2d.ts.net
|
||||
4
k8s/apps/comfyui/namespace.yaml
Normal file
4
k8s/apps/comfyui/namespace.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
apiVersion: v1
|
||||
kind: Namespace
|
||||
metadata:
|
||||
name: comfyui
|
||||
12
k8s/apps/comfyui/pvc.yaml
Normal file
12
k8s/apps/comfyui/pvc.yaml
Normal file
@@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolumeClaim
|
||||
metadata:
|
||||
name: comfyui-data-pvc
|
||||
namespace: comfyui
|
||||
spec:
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
storageClassName: local-path
|
||||
resources:
|
||||
requests:
|
||||
storage: 200Gi
|
||||
15
k8s/apps/comfyui/service.yaml
Normal file
15
k8s/apps/comfyui/service.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: comfyui
|
||||
namespace: comfyui
|
||||
labels:
|
||||
app: comfyui
|
||||
spec:
|
||||
ports:
|
||||
- name: http
|
||||
port: 8188
|
||||
targetPort: 8188
|
||||
protocol: TCP
|
||||
selector:
|
||||
app: comfyui
|
||||
20
k8s/apps/lidarr/app.yaml
Normal file
20
k8s/apps/lidarr/app.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: lidarr
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: apps
|
||||
destination:
|
||||
namespace: lidarr
|
||||
server: https://kubernetes.default.svc
|
||||
source:
|
||||
repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git
|
||||
targetRevision: HEAD
|
||||
path: k8s/apps/lidarr
|
||||
syncPolicy:
|
||||
automated:
|
||||
selfHeal: true
|
||||
prune: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
14
k8s/apps/lidarr/kustomization.yaml
Normal file
14
k8s/apps/lidarr/kustomization.yaml
Normal file
@@ -0,0 +1,14 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- app.yaml
|
||||
|
||||
helmCharts:
|
||||
- name: lidarr
|
||||
repo: https://k8s-home-lab.github.io/helm-charts/
|
||||
version: 15.3.0
|
||||
releaseName: lidarr
|
||||
namespace: lidarr
|
||||
valuesFile: lidarr-values.yaml
|
||||
includeCRDs: true
|
||||
27
k8s/apps/lidarr/lidarr-values.yaml
Normal file
27
k8s/apps/lidarr/lidarr-values.yaml
Normal file
@@ -0,0 +1,27 @@
|
||||
env:
|
||||
TZ: Asia/Nicosia
|
||||
|
||||
resources:
|
||||
requests:
|
||||
memory: "512Mi"
|
||||
cpu: "200m"
|
||||
limits:
|
||||
memory: "2Gi"
|
||||
cpu: "1500m"
|
||||
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
|
||||
persistence:
|
||||
config:
|
||||
enabled: true
|
||||
type: hostPath
|
||||
hostPath: /k8s/lidarr
|
||||
mountPath: /config
|
||||
|
||||
downloads:
|
||||
enabled: true
|
||||
type: hostPath
|
||||
hostPath: /k8s/media/downloads
|
||||
mountPath: /downloads
|
||||
accessMode: ReadWriteOnce
|
||||
@@ -50,7 +50,7 @@ spec:
|
||||
runAsNonRoot: true
|
||||
containers:
|
||||
- name: n8n
|
||||
image: docker.n8n.io/n8nio/n8n:latest
|
||||
image: n8nio/n8n:latest
|
||||
ports:
|
||||
- containerPort: 5678
|
||||
name: http
|
||||
@@ -77,16 +77,22 @@ spec:
|
||||
value: "external"
|
||||
- name: N8N_RUNNERS_BROKER_LISTEN_ADDRESS
|
||||
value: "0.0.0.0"
|
||||
- name: N8N_LISTEN_ADDRESS
|
||||
value: "0.0.0.0"
|
||||
- name: N8N_RUNNERS_BROKER_PORT
|
||||
value: "5679"
|
||||
- name: EXECUTIONS_MODE
|
||||
value: "queue"
|
||||
- name: QUEUE_BULL_REDIS_HOST
|
||||
value: "n8n-redis"
|
||||
- name: QUEUE_BULL_REDIS_PORT
|
||||
value: "6379"
|
||||
- name: NODE_ENV
|
||||
value: "production"
|
||||
- name: WEBHOOK_URL
|
||||
value: "https://n8n.hexor.cy/"
|
||||
- name: N8N_PROXY_HOPS
|
||||
value: "1"
|
||||
- name: GENERIC_TIMEZONE
|
||||
value: "Europe/Moscow"
|
||||
- name: TZ
|
||||
@@ -128,7 +134,7 @@ spec:
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 4000m
|
||||
memory: 2048Gi
|
||||
memory: 2048Mi
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /healthz
|
||||
|
||||
@@ -21,7 +21,7 @@ spec:
|
||||
serviceAccountName: n8n
|
||||
containers:
|
||||
- name: n8n-runner
|
||||
image: docker.n8n.io/n8nio/runners:latest
|
||||
image: n8nio/runners:latest
|
||||
ports:
|
||||
- containerPort: 5680
|
||||
name: health
|
||||
|
||||
84
k8s/apps/n8n/deployment-worker.yaml
Normal file
84
k8s/apps/n8n/deployment-worker.yaml
Normal file
@@ -0,0 +1,84 @@
|
||||
---
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: n8n-worker
|
||||
labels:
|
||||
app: n8n
|
||||
component: worker
|
||||
spec:
|
||||
replicas: 2
|
||||
selector:
|
||||
matchLabels:
|
||||
app: n8n
|
||||
component: worker
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: n8n
|
||||
component: worker
|
||||
spec:
|
||||
serviceAccountName: n8n
|
||||
containers:
|
||||
- name: n8n-worker
|
||||
image: n8nio/n8n:latest
|
||||
command:
|
||||
- n8n
|
||||
- worker
|
||||
env:
|
||||
- name: HOME
|
||||
value: "/home/node"
|
||||
- name: N8N_ENFORCE_SETTINGS_FILE_PERMISSIONS
|
||||
value: "true"
|
||||
- name: EXECUTIONS_MODE
|
||||
value: "queue"
|
||||
- name: QUEUE_BULL_REDIS_HOST
|
||||
value: "n8n-redis"
|
||||
- name: QUEUE_BULL_REDIS_PORT
|
||||
value: "6379"
|
||||
- name: NODE_ENV
|
||||
value: "production"
|
||||
- name: GENERIC_TIMEZONE
|
||||
value: "Europe/Moscow"
|
||||
- name: TZ
|
||||
value: "Europe/Moscow"
|
||||
- name: DB_TYPE
|
||||
value: "postgresdb"
|
||||
- name: DB_POSTGRESDB_HOST
|
||||
value: "psql.psql.svc"
|
||||
- name: DB_POSTGRESDB_DATABASE
|
||||
value: "n8n"
|
||||
- name: DB_POSTGRESDB_USER
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: username
|
||||
- name: DB_POSTGRESDB_PASSWORD
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: password
|
||||
- name: N8N_ENCRYPTION_KEY
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: credentials
|
||||
key: encryptionkey
|
||||
volumeMounts:
|
||||
- name: n8n-data
|
||||
mountPath: /home/node/.n8n
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 512Mi
|
||||
limits:
|
||||
cpu: 2000m
|
||||
memory: 2048Mi
|
||||
volumes:
|
||||
- name: n8n-data
|
||||
persistentVolumeClaim:
|
||||
claimName: n8n-data
|
||||
securityContext:
|
||||
runAsUser: 1000
|
||||
runAsGroup: 1000
|
||||
runAsNonRoot: true
|
||||
fsGroup: 1000
|
||||
@@ -10,7 +10,7 @@ resources:
|
||||
- paddleocr-deployment.yaml
|
||||
- paddleocr-service.yaml
|
||||
- deployment-main.yaml
|
||||
# - deployment-worker.yaml
|
||||
- deployment-worker.yaml
|
||||
- deployment-runner.yaml
|
||||
- service.yaml
|
||||
- ingress.yaml
|
||||
|
||||
@@ -3,19 +3,23 @@ kind: Kustomization
|
||||
|
||||
resources:
|
||||
- external-secrets.yaml
|
||||
- local-pv.yaml
|
||||
|
||||
helmCharts:
|
||||
- name: ollama
|
||||
repo: https://otwld.github.io/ollama-helm/
|
||||
version: 0.4.0
|
||||
version: 1.49.0
|
||||
releaseName: ollama
|
||||
namespace: ollama
|
||||
valuesFile: ollama-values.yaml
|
||||
includeCRDs: true
|
||||
- name: open-webui
|
||||
repo: https://helm.openwebui.com/
|
||||
version: 8.14.0
|
||||
version: 12.8.1
|
||||
releaseName: openweb-ui
|
||||
namespace: ollama
|
||||
valuesFile: openweb-ui-values.yaml
|
||||
includeCRDs: true
|
||||
includeCRDs: true
|
||||
|
||||
patches:
|
||||
- path: patch-runtimeclass.yaml
|
||||
22
k8s/apps/ollama/local-pv.yaml
Normal file
22
k8s/apps/ollama/local-pv.yaml
Normal file
@@ -0,0 +1,22 @@
|
||||
apiVersion: v1
|
||||
kind: PersistentVolume
|
||||
metadata:
|
||||
name: ollama-local-pv
|
||||
spec:
|
||||
capacity:
|
||||
storage: 100Gi
|
||||
volumeMode: Filesystem
|
||||
accessModes:
|
||||
- ReadWriteOnce
|
||||
persistentVolumeReclaimPolicy: Retain
|
||||
storageClassName: local-path
|
||||
local:
|
||||
path: /var/lib/ollama
|
||||
nodeAffinity:
|
||||
required:
|
||||
nodeSelectorTerms:
|
||||
- matchExpressions:
|
||||
- key: kubernetes.io/hostname
|
||||
operator: In
|
||||
values:
|
||||
- uk-desktop.tail2fe2d.ts.net
|
||||
@@ -3,6 +3,20 @@ image:
|
||||
pullPolicy: Always
|
||||
tag: "latest"
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
kubernetes.io/hostname: uk-desktop.tail2fe2d.ts.net
|
||||
tolerations:
|
||||
- key: workload
|
||||
operator: Equal
|
||||
value: desktop
|
||||
effect: NoSchedule
|
||||
ingress:
|
||||
enabled: false
|
||||
ollama:
|
||||
gpu:
|
||||
enabled: true
|
||||
type: 'nvidia'
|
||||
number: 1
|
||||
persistentVolume:
|
||||
enabled: true
|
||||
size: 100Gi
|
||||
storageClass: "local-path"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
clusterDomain: ai.hexor.cy
|
||||
clusterDomain: cluster.local
|
||||
|
||||
extraEnvVars:
|
||||
GLOBAL_LOG_LEVEL: debug
|
||||
@@ -32,12 +32,22 @@ ollama:
|
||||
|
||||
pipelines:
|
||||
enabled: true
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
|
||||
tika:
|
||||
enabled: true
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
|
||||
websocket:
|
||||
enabled: true
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
redis:
|
||||
master:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
@@ -46,7 +56,5 @@ ingress:
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.middlewares: kube-system-https-redirect@kubernetescrd
|
||||
host: "ai.hexor.cy"
|
||||
tls:
|
||||
- hosts:
|
||||
- '*.hexor.cy'
|
||||
secretName: ollama-tls
|
||||
tls: true
|
||||
existingSecret: ollama-tls
|
||||
9
k8s/apps/ollama/patch-runtimeclass.yaml
Normal file
9
k8s/apps/ollama/patch-runtimeclass.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: ollama
|
||||
namespace: ollama
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
runtimeClassName: nvidia
|
||||
@@ -1,5 +1,5 @@
|
||||
image:
|
||||
tag: 2.20.3
|
||||
tag: latest
|
||||
resources:
|
||||
requests:
|
||||
memory: "1Gi"
|
||||
@@ -9,7 +9,7 @@ resources:
|
||||
cpu: "3000m"
|
||||
initContainers:
|
||||
install-tesseract-langs:
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:2.18.2
|
||||
image: ghcr.io/paperless-ngx/paperless-ngx:latest
|
||||
resources:
|
||||
requests:
|
||||
memory: "256Mi"
|
||||
|
||||
@@ -19,6 +19,14 @@ spec:
|
||||
{{ .password }}
|
||||
AUTHENTIK_SECRET_KEY: |-
|
||||
{{ .secret_key }}
|
||||
POSTGRES_PASSWORD: |-
|
||||
{{ .password }}
|
||||
POSTGRES_USER: |-
|
||||
{{ .username }}
|
||||
username: |-
|
||||
{{ .password }}
|
||||
password: |-
|
||||
{{ .username }}
|
||||
data:
|
||||
- secretKey: password
|
||||
sourceRef:
|
||||
|
||||
@@ -5,12 +5,12 @@ resources:
|
||||
- app.yaml
|
||||
- external-secrets.yaml
|
||||
- https-middleware.yaml
|
||||
- worker-restart.yaml
|
||||
# - worker-restart.yaml
|
||||
|
||||
helmCharts:
|
||||
- name: authentik
|
||||
repo: https://charts.goauthentik.io
|
||||
version: 2025.10.1
|
||||
version: 2026.2.0
|
||||
releaseName: authentik
|
||||
namespace: authentik
|
||||
valuesFile: values.yaml
|
||||
|
||||
20
k8s/core/gpu/app.yaml
Normal file
20
k8s/core/gpu/app.yaml
Normal file
@@ -0,0 +1,20 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: gpu-system
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: core
|
||||
destination:
|
||||
namespace: gpu-system
|
||||
server: https://kubernetes.default.svc
|
||||
source:
|
||||
repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git
|
||||
targetRevision: HEAD
|
||||
path: k8s/core/gpu
|
||||
syncPolicy:
|
||||
automated:
|
||||
selfHeal: true
|
||||
prune: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
15
k8s/core/gpu/kustomization.yaml
Normal file
15
k8s/core/gpu/kustomization.yaml
Normal file
@@ -0,0 +1,15 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- app.yaml
|
||||
- runtime-class.yaml
|
||||
|
||||
helmCharts:
|
||||
- name: nvidia-device-plugin
|
||||
repo: https://nvidia.github.io/k8s-device-plugin
|
||||
version: 0.17.0
|
||||
releaseName: nvidia-device-plugin
|
||||
namespace: gpu-system
|
||||
valuesFile: values.yaml
|
||||
includeCRDs: true
|
||||
5
k8s/core/gpu/runtime-class.yaml
Normal file
5
k8s/core/gpu/runtime-class.yaml
Normal file
@@ -0,0 +1,5 @@
|
||||
apiVersion: node.k8s.io/v1
|
||||
kind: RuntimeClass
|
||||
metadata:
|
||||
name: nvidia
|
||||
handler: nvidia
|
||||
23
k8s/core/gpu/values.yaml
Normal file
23
k8s/core/gpu/values.yaml
Normal file
@@ -0,0 +1,23 @@
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: uk-desktop.tail2fe2d.ts.net
|
||||
|
||||
tolerations:
|
||||
- key: workload
|
||||
operator: Equal
|
||||
value: desktop
|
||||
effect: NoSchedule
|
||||
|
||||
runtimeClassName: nvidia
|
||||
|
||||
setAsDefault: false
|
||||
|
||||
config:
|
||||
default: any
|
||||
map:
|
||||
any: |-
|
||||
version: v1
|
||||
sharing:
|
||||
timeSlicing:
|
||||
resources:
|
||||
- name: nvidia.com/gpu
|
||||
replicas: 4
|
||||
@@ -260,7 +260,7 @@ data:
|
||||
to: 0
|
||||
datasourceUid: P76F38748CEC837F0
|
||||
model:
|
||||
expr: 'node_load5 / on(instance) group_left count by(instance)(node_cpu_seconds_total{mode="idle"})'
|
||||
expr: 'node_load15 / on(instance) group_left count by(instance)(node_cpu_seconds_total{mode="idle"})'
|
||||
refId: A
|
||||
intervalMs: 1000
|
||||
maxDataPoints: 43200
|
||||
@@ -273,7 +273,7 @@ data:
|
||||
conditions:
|
||||
- evaluator:
|
||||
params:
|
||||
- 0.8
|
||||
- 2
|
||||
type: gt
|
||||
operator:
|
||||
type: and
|
||||
@@ -283,16 +283,16 @@ data:
|
||||
type: __expr__
|
||||
uid: __expr__
|
||||
expression: A
|
||||
reducer: max
|
||||
reducer: last
|
||||
refId: B
|
||||
type: reduce
|
||||
noDataState: NoData
|
||||
execErrState: Alerting
|
||||
for: 5m
|
||||
for: 15m
|
||||
annotations:
|
||||
node: '{{ $labels.instance }}'
|
||||
load_average: '{{ printf "%.2f" $values.A }}'
|
||||
summary: 'Node load average is high relative to CPU count'
|
||||
summary: 'Node load average is critically high relative to CPU count'
|
||||
labels:
|
||||
severity: warning
|
||||
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
envFromSecret: grafana-admin
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
|
||||
admin:
|
||||
existingSecret: grafana-admin
|
||||
userKey: username
|
||||
passwordKey: password
|
||||
|
||||
grafana.ini:
|
||||
auth:
|
||||
signout_redirect_url: https://idm.hexor.cy/application/o/grafana/end-session/
|
||||
# oauth_auto_login: true
|
||||
auth.generic_oauth:
|
||||
name: authentik
|
||||
enabled: true
|
||||
scopes: "openid profile email"
|
||||
auth_url: https://idm.hexor.cy/application/o/authorize/
|
||||
token_url: https://idm.hexor.cy/application/o/token/
|
||||
api_url: https://idm.hexor.cy/application/o/userinfo/
|
||||
role_attribute_path: >-
|
||||
contains(groups, 'Grafana Admin') && 'Admin' ||
|
||||
contains(groups, 'Grafana Editors') && 'Editor' ||
|
||||
contains(groups, 'Grafana Viewer') && 'Viewer'
|
||||
database:
|
||||
type: postgres
|
||||
host: psql.psql.svc:5432
|
||||
name: grafana
|
||||
user: grafana
|
||||
ssl_mode: disable
|
||||
|
||||
datasources:
|
||||
datasources.yaml:
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus Local
|
||||
type: prometheus
|
||||
url: http://prometheus-kube-prometheus-prometheus.prometheus.svc:9090
|
||||
access: proxy
|
||||
isDefault: true
|
||||
- name: Loki
|
||||
type: loki
|
||||
url: http://loki-gateway.prometheus.svc:80
|
||||
access: proxy
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.middlewares: kube-system-https-redirect@kubernetescrd
|
||||
hosts:
|
||||
- gf.hexor.cy
|
||||
tls:
|
||||
- secretName: grafana-tls
|
||||
hosts:
|
||||
- '*.hexor.cy'
|
||||
|
||||
extraConfigmapMounts:
|
||||
- name: grafana-alerting-rules
|
||||
mountPath: /etc/grafana/provisioning/alerting/rules.yaml
|
||||
configMap: grafana-alerting
|
||||
subPath: rules.yaml
|
||||
readOnly: true
|
||||
- name: grafana-alerting-contactpoints
|
||||
mountPath: /etc/grafana/provisioning/alerting/contactpoints.yaml
|
||||
configMap: grafana-alerting
|
||||
subPath: contactpoints.yaml
|
||||
readOnly: true
|
||||
- name: grafana-alerting-policies
|
||||
mountPath: /etc/grafana/provisioning/alerting/policies.yaml
|
||||
configMap: grafana-alerting
|
||||
subPath: policies.yaml
|
||||
readOnly: true
|
||||
|
||||
envValueFrom:
|
||||
TELEGRAM_BOT_TOKEN:
|
||||
secretKeyRef:
|
||||
name: grafana-telegram
|
||||
key: bot-token
|
||||
TELEGRAM_CHAT_ID:
|
||||
secretKeyRef:
|
||||
name: grafana-telegram
|
||||
key: chat-id
|
||||
|
||||
@@ -16,14 +16,6 @@ helmCharts:
|
||||
valuesFile: prom-values.yaml
|
||||
includeCRDs: true
|
||||
|
||||
- name: grafana
|
||||
repo: https://grafana.github.io/helm-charts
|
||||
version: 10.2.0
|
||||
releaseName: grafana
|
||||
namespace: prometheus
|
||||
valuesFile: grafana-values.yaml
|
||||
includeCRDs: true
|
||||
|
||||
- name: loki
|
||||
repo: https://grafana.github.io/helm-charts
|
||||
version: 6.29.0
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
grafana:
|
||||
enabled: false
|
||||
|
||||
|
||||
alertmanager:
|
||||
config:
|
||||
@@ -92,3 +91,88 @@ prometheus:
|
||||
requests:
|
||||
storage: 400Gi
|
||||
|
||||
grafana:
|
||||
enabled: true
|
||||
|
||||
serviceAccount:
|
||||
create: true
|
||||
name: "prom-grafana-sa"
|
||||
|
||||
envFromSecret: grafana-admin
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: master.tail2fe2d.ts.net
|
||||
|
||||
admin:
|
||||
existingSecret: grafana-admin
|
||||
userKey: username
|
||||
passwordKey: password
|
||||
|
||||
grafana.ini:
|
||||
auth:
|
||||
signout_redirect_url: https://idm.hexor.cy/application/o/grafana/end-session/
|
||||
auth.generic_oauth:
|
||||
name: authentik
|
||||
enabled: true
|
||||
scopes: "openid profile email"
|
||||
auth_url: https://idm.hexor.cy/application/o/authorize/
|
||||
token_url: https://idm.hexor.cy/application/o/token/
|
||||
api_url: https://idm.hexor.cy/application/o/userinfo/
|
||||
role_attribute_path: >-
|
||||
contains(groups, 'Grafana Admin') && 'Admin' ||
|
||||
contains(groups, 'Grafana Editors') && 'Editor' ||
|
||||
contains(groups, 'Grafana Viewer') && 'Viewer'
|
||||
database:
|
||||
type: postgres
|
||||
host: psql.psql.svc:5432
|
||||
name: grafana
|
||||
user: grafana
|
||||
ssl_mode: disable
|
||||
|
||||
# The Loki datasource config needs to be preserved,
|
||||
# but instead of "datasources.datasources.yaml", we define it like this for the prometheus-stack chart:
|
||||
additionalDataSources:
|
||||
- name: Loki
|
||||
type: loki
|
||||
url: http://loki-gateway.prometheus.svc:80
|
||||
access: proxy
|
||||
orgId: 1
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
annotations:
|
||||
cert-manager.io/cluster-issuer: letsencrypt
|
||||
traefik.ingress.kubernetes.io/router.middlewares: kube-system-https-redirect@kubernetescrd
|
||||
hosts:
|
||||
- gf.hexor.cy
|
||||
tls:
|
||||
- secretName: grafana-tls
|
||||
hosts:
|
||||
- '*.hexor.cy'
|
||||
|
||||
extraConfigmapMounts:
|
||||
- name: grafana-alerting-rules
|
||||
mountPath: /etc/grafana/provisioning/alerting/rules.yaml
|
||||
configMap: grafana-alerting
|
||||
subPath: rules.yaml
|
||||
readOnly: true
|
||||
- name: grafana-alerting-contactpoints
|
||||
mountPath: /etc/grafana/provisioning/alerting/contactpoints.yaml
|
||||
configMap: grafana-alerting
|
||||
subPath: contactpoints.yaml
|
||||
readOnly: true
|
||||
- name: grafana-alerting-policies
|
||||
mountPath: /etc/grafana/provisioning/alerting/policies.yaml
|
||||
configMap: grafana-alerting
|
||||
subPath: policies.yaml
|
||||
readOnly: true
|
||||
|
||||
envValueFrom:
|
||||
TELEGRAM_BOT_TOKEN:
|
||||
secretKeyRef:
|
||||
name: grafana-telegram
|
||||
key: bot-token
|
||||
TELEGRAM_CHAT_ID:
|
||||
secretKeyRef:
|
||||
name: grafana-telegram
|
||||
key: chat-id
|
||||
|
||||
Reference in New Issue
Block a user