Added llama.cpp on ai

2026-06-04 16:59:58 +03:00
parent 6b717f5219
commit 82dbe84075
5 changed files with 125 additions and 0 deletions
@@ -0,0 +1,70 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llamacpp
+  annotations:
+    reloader.stakater.com/auto: "true"
+  labels:
+    app: llamacpp
+spec:
+  replicas: 1
+  strategy:
+    type: Recreate
+  selector:
+    matchLabels:
+      app: llamacpp
+  template:
+    metadata:
+      labels:
+        app: llamacpp
+    spec:
+      nodeSelector:
+        kubernetes.io/hostname: ai.tail2fe2d.ts.net
+      tolerations:
+        - key: workload
+          operator: Equal
+          value: ai
+          effect: NoSchedule
+      containers:
+        - name: llamacpp
+          image: ghcr.io/ggml-org/llama.cpp:server-rocm
+          imagePullPolicy: Always
+          envFrom:
+            - configMapRef:
+                name: llamacpp-config
+          env:
+            - name: HF_TOKEN
+              valueFrom:
+                secretKeyRef:
+                  name: llamacpp-hf-token
+                  key: token
+                  optional: true
+          ports:
+            - name: http
+              containerPort: 8080
+              protocol: TCP
+          resources:
+            limits:
+              amd.com/gpu: 1
+          startupProbe:
+            httpGet:
+              path: /health
+              port: http
+            failureThreshold: 180
+            periodSeconds: 10
+            timeoutSeconds: 5
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: http
+            failureThreshold: 3
+            periodSeconds: 10
+            timeoutSeconds: 5
+          volumeMounts:
+            - name: models
+              mountPath: /models
+      volumes:
+        - name: models
+          hostPath:
+            path: /k8s/llamacpp/models
+            type: DirectoryOrCreate