From 82dbe84075745ddedf95a13615ffcb5a1eddf883 Mon Sep 17 00:00:00 2001 From: Ultradesu Date: Thu, 4 Jun 2026 16:59:58 +0300 Subject: [PATCH] Added llama.cpp on ai --- k8s/apps/llamacpp/app.yaml | 20 ++++++++ k8s/apps/llamacpp/configmap.yaml | 12 +++++ k8s/apps/llamacpp/deployment.yaml | 70 ++++++++++++++++++++++++++++ k8s/apps/llamacpp/kustomization.yaml | 8 ++++ k8s/apps/llamacpp/service.yaml | 15 ++++++ 5 files changed, 125 insertions(+) create mode 100644 k8s/apps/llamacpp/app.yaml create mode 100644 k8s/apps/llamacpp/configmap.yaml create mode 100644 k8s/apps/llamacpp/deployment.yaml create mode 100644 k8s/apps/llamacpp/kustomization.yaml create mode 100644 k8s/apps/llamacpp/service.yaml diff --git a/k8s/apps/llamacpp/app.yaml b/k8s/apps/llamacpp/app.yaml new file mode 100644 index 0000000..2576b72 --- /dev/null +++ b/k8s/apps/llamacpp/app.yaml @@ -0,0 +1,20 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: llamacpp + namespace: argocd +spec: + project: apps + destination: + namespace: llamacpp + server: https://kubernetes.default.svc + source: + repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git + targetRevision: HEAD + path: k8s/apps/llamacpp + syncPolicy: + automated: + selfHeal: true + prune: true + syncOptions: + - CreateNamespace=true diff --git a/k8s/apps/llamacpp/configmap.yaml b/k8s/apps/llamacpp/configmap.yaml new file mode 100644 index 0000000..def4c36 --- /dev/null +++ b/k8s/apps/llamacpp/configmap.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: llamacpp-config +data: + LLAMA_CACHE: /models + LLAMA_ARG_HOST: 0.0.0.0 + LLAMA_ARG_PORT: "8080" + LLAMA_ARG_HF_REPO: unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K + LLAMA_ARG_CTX_SIZE: "32768" + LLAMA_ARG_FLASH_ATTN: auto + LLAMA_ARG_FIT: "on" diff --git a/k8s/apps/llamacpp/deployment.yaml b/k8s/apps/llamacpp/deployment.yaml new file mode 100644 index 0000000..991d075 --- /dev/null +++ b/k8s/apps/llamacpp/deployment.yaml @@ -0,0 +1,70 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llamacpp + annotations: + reloader.stakater.com/auto: "true" + labels: + app: llamacpp +spec: + replicas: 1 + strategy: + type: Recreate + selector: + matchLabels: + app: llamacpp + template: + metadata: + labels: + app: llamacpp + spec: + nodeSelector: + kubernetes.io/hostname: ai.tail2fe2d.ts.net + tolerations: + - key: workload + operator: Equal + value: ai + effect: NoSchedule + containers: + - name: llamacpp + image: ghcr.io/ggml-org/llama.cpp:server-rocm + imagePullPolicy: Always + envFrom: + - configMapRef: + name: llamacpp-config + env: + - name: HF_TOKEN + valueFrom: + secretKeyRef: + name: llamacpp-hf-token + key: token + optional: true + ports: + - name: http + containerPort: 8080 + protocol: TCP + resources: + limits: + amd.com/gpu: 1 + startupProbe: + httpGet: + path: /health + port: http + failureThreshold: 180 + periodSeconds: 10 + timeoutSeconds: 5 + readinessProbe: + httpGet: + path: /health + port: http + failureThreshold: 3 + periodSeconds: 10 + timeoutSeconds: 5 + volumeMounts: + - name: models + mountPath: /models + volumes: + - name: models + hostPath: + path: /k8s/llamacpp/models + type: DirectoryOrCreate diff --git a/k8s/apps/llamacpp/kustomization.yaml b/k8s/apps/llamacpp/kustomization.yaml new file mode 100644 index 0000000..15d9dc2 --- /dev/null +++ b/k8s/apps/llamacpp/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - app.yaml + - configmap.yaml + - deployment.yaml + - service.yaml diff --git a/k8s/apps/llamacpp/service.yaml b/k8s/apps/llamacpp/service.yaml new file mode 100644 index 0000000..a9a3fd5 --- /dev/null +++ b/k8s/apps/llamacpp/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: llamacpp + labels: + app: llamacpp +spec: + type: ClusterIP + selector: + app: llamacpp + ports: + - name: http + port: 8080 + targetPort: http + protocol: TCP