From 82dbe84075745ddedf95a13615ffcb5a1eddf883 Mon Sep 17 00:00:00 2001
From: Ultradesu <ultradesu@hexor.cy>
Date: Thu, 4 Jun 2026 16:59:58 +0300
Subject: [PATCH] Added llama.cpp on ai

---
 k8s/apps/llamacpp/app.yaml           | 20 ++++++++
 k8s/apps/llamacpp/configmap.yaml     | 12 +++++
 k8s/apps/llamacpp/deployment.yaml    | 70 ++++++++++++++++++++++++++++
 k8s/apps/llamacpp/kustomization.yaml |  8 ++++
 k8s/apps/llamacpp/service.yaml       | 15 ++++++
 5 files changed, 125 insertions(+)
 create mode 100644 k8s/apps/llamacpp/app.yaml
 create mode 100644 k8s/apps/llamacpp/configmap.yaml
 create mode 100644 k8s/apps/llamacpp/deployment.yaml
 create mode 100644 k8s/apps/llamacpp/kustomization.yaml
 create mode 100644 k8s/apps/llamacpp/service.yaml

diff --git a/k8s/apps/llamacpp/app.yaml b/k8s/apps/llamacpp/app.yaml
new file mode 100644
index 0000000..2576b72
--- /dev/null
+++ b/k8s/apps/llamacpp/app.yaml
@@ -0,0 +1,20 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: llamacpp
+  namespace: argocd
+spec:
+  project: apps
+  destination:
+    namespace: llamacpp
+    server: https://kubernetes.default.svc
+  source:
+    repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git
+    targetRevision: HEAD
+    path: k8s/apps/llamacpp
+  syncPolicy:
+    automated:
+      selfHeal: true
+      prune: true
+    syncOptions:
+      - CreateNamespace=true
diff --git a/k8s/apps/llamacpp/configmap.yaml b/k8s/apps/llamacpp/configmap.yaml
new file mode 100644
index 0000000..def4c36
--- /dev/null
+++ b/k8s/apps/llamacpp/configmap.yaml
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: llamacpp-config
+data:
+  LLAMA_CACHE: /models
+  LLAMA_ARG_HOST: 0.0.0.0
+  LLAMA_ARG_PORT: "8080"
+  LLAMA_ARG_HF_REPO: unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K
+  LLAMA_ARG_CTX_SIZE: "32768"
+  LLAMA_ARG_FLASH_ATTN: auto
+  LLAMA_ARG_FIT: "on"
diff --git a/k8s/apps/llamacpp/deployment.yaml b/k8s/apps/llamacpp/deployment.yaml
new file mode 100644
index 0000000..991d075
--- /dev/null
+++ b/k8s/apps/llamacpp/deployment.yaml
@@ -0,0 +1,70 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llamacpp
+  annotations:
+    reloader.stakater.com/auto: "true"
+  labels:
+    app: llamacpp
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: llamacpp
+  template:
+    metadata:
+      labels:
+        app: llamacpp
+    spec:
+      nodeSelector:
+        kubernetes.io/hostname: ai.tail2fe2d.ts.net
+      tolerations:
+        - key: workload
+          operator: Equal
+          value: ai
+          effect: NoSchedule
+      containers:
+        - name: llamacpp
+          image: ghcr.io/ggml-org/llama.cpp:server-rocm
+          imagePullPolicy: Always
+          envFrom:
+            - configMapRef:
+                name: llamacpp-config
+          env:
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: llamacpp-hf-token
+                  key: token
+                  optional: true
+          ports:
+            - name: http
+              containerPort: 8080
+              protocol: TCP
+          resources:
+            limits:
+              amd.com/gpu: 1
+          startupProbe:
+            httpGet:
+              path: /health
+              port: http
+            failureThreshold: 180
+            periodSeconds: 10
+            timeoutSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            failureThreshold: 3
+            periodSeconds: 10
+            timeoutSeconds: 5
+          volumeMounts:
+            - name: models
+              mountPath: /models
+      volumes:
+        - name: models
+          hostPath:
+            path: /k8s/llamacpp/models
+            type: DirectoryOrCreate
diff --git a/k8s/apps/llamacpp/kustomization.yaml b/k8s/apps/llamacpp/kustomization.yaml
new file mode 100644
index 0000000..15d9dc2
--- /dev/null
+++ b/k8s/apps/llamacpp/kustomization.yaml
@@ -0,0 +1,8 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - app.yaml
+  - configmap.yaml
+  - deployment.yaml
+  - service.yaml
diff --git a/k8s/apps/llamacpp/service.yaml b/k8s/apps/llamacpp/service.yaml
new file mode 100644
index 0000000..a9a3fd5
--- /dev/null
+++ b/k8s/apps/llamacpp/service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: llamacpp
+  labels:
+    app: llamacpp
+spec:
+  type: ClusterIP
+  selector:
+    app: llamacpp
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+      protocol: TCP