Added llama.cpp on ai

2026-06-04 16:59:58 +03:00
parent 6b717f5219
commit 82dbe84075
5 changed files with 125 additions and 0 deletions
@@ -0,0 +1,20 @@
+apiVersion: argoproj.io/v1alpha1
+kind: Application
+metadata:
+  name: llamacpp
+  namespace: argocd
+spec:
+  project: apps
+  destination:
+    namespace: llamacpp
+    server: https://kubernetes.default.svc
+  source:
+    repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git
+    targetRevision: HEAD
+    path: k8s/apps/llamacpp
+  syncPolicy:
+    automated:
+      selfHeal: true
+      prune: true
+    syncOptions:
+      - CreateNamespace=true
@@ -0,0 +1,12 @@
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: llamacpp-config
+data:
+  LLAMA_CACHE: /models
+  LLAMA_ARG_HOST: 0.0.0.0
+  LLAMA_ARG_PORT: "8080"
+  LLAMA_ARG_HF_REPO: unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K
+  LLAMA_ARG_CTX_SIZE: "32768"
+  LLAMA_ARG_FLASH_ATTN: auto
+  LLAMA_ARG_FIT: "on"
@@ -0,0 +1,70 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llamacpp
+  annotations:
+    reloader.stakater.com/auto: "true"
+  labels:
+    app: llamacpp
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: llamacpp
+  template:
+    metadata:
+      labels:
+        app: llamacpp
+    spec:
+      nodeSelector:
+        kubernetes.io/hostname: ai.tail2fe2d.ts.net
+      tolerations:
+        - key: workload
+          operator: Equal
+          value: ai
+          effect: NoSchedule
+      containers:
+        - name: llamacpp
+          image: ghcr.io/ggml-org/llama.cpp:server-rocm
+          imagePullPolicy: Always
+          envFrom:
+            - configMapRef:
+                name: llamacpp-config
+          env:
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: llamacpp-hf-token
+                  key: token
+                  optional: true
+          ports:
+            - name: http
+              containerPort: 8080
+              protocol: TCP
+          resources:
+            limits:
+              amd.com/gpu: 1
+          startupProbe:
+            httpGet:
+              path: /health
+              port: http
+            failureThreshold: 180
+            periodSeconds: 10
+            timeoutSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            failureThreshold: 3
+            periodSeconds: 10
+            timeoutSeconds: 5
+          volumeMounts:
+            - name: models
+              mountPath: /models
+      volumes:
+        - name: models
+          hostPath:
+            path: /k8s/llamacpp/models
+            type: DirectoryOrCreate
@@ -0,0 +1,8 @@
+apiVersion: kustomize.config.k8s.io/v1beta1
+kind: Kustomization
+
+resources:
+  - app.yaml
+  - configmap.yaml
+  - deployment.yaml
+  - service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: llamacpp
+  labels:
+    app: llamacpp
+spec:
+  type: ClusterIP
+  selector:
+    app: llamacpp
+  ports:
+    - name: http
+      port: 8080
+      targetPort: http
+      protocol: TCP