Added llama.cpp on ai

2026-06-04 16:59:58 +03:00
parent 6b717f5219
commit 82dbe84075
5 changed files with 125 additions and 0 deletions
@@ -0,0 +1,20 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: llamacpp
  namespace: argocd
 spec:
  project: apps
  destination:
    namespace: llamacpp
    server: https://kubernetes.default.svc
  source:
    repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git
    targetRevision: HEAD
    path: k8s/apps/llamacpp
  syncPolicy:
    automated:
      selfHeal: true
      prune: true
    syncOptions:
      - CreateNamespace=true
@@ -0,0 +1,12 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: llamacpp-config
 data:
  LLAMA_CACHE: /models
  LLAMA_ARG_HOST: 0.0.0.0
  LLAMA_ARG_PORT: "8080"
  LLAMA_ARG_HF_REPO: unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K
  LLAMA_ARG_CTX_SIZE: "32768"
  LLAMA_ARG_FLASH_ATTN: auto
  LLAMA_ARG_FIT: "on"
@@ -0,0 +1,70 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llamacpp
  annotations:
    reloader.stakater.com/auto: "true"
  labels:
    app: llamacpp
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: llamacpp
  template:
    metadata:
      labels:
        app: llamacpp
    spec:
      nodeSelector:
        kubernetes.io/hostname: ai.tail2fe2d.ts.net
      tolerations:
        - key: workload
          operator: Equal
          value: ai
          effect: NoSchedule
      containers:
        - name: llamacpp
          image: ghcr.io/ggml-org/llama.cpp:server-rocm
          imagePullPolicy: Always
          envFrom:
            - configMapRef:
                name: llamacpp-config
          env:
            - name: HF_TOKEN
              valueFrom:
                secretKeyRef:
                  name: llamacpp-hf-token
                  key: token
                  optional: true
          ports:
            - name: http
              containerPort: 8080
              protocol: TCP
          resources:
            limits:
              amd.com/gpu: 1
          startupProbe:
            httpGet:
              path: /health
              port: http
            failureThreshold: 180
            periodSeconds: 10
            timeoutSeconds: 5
          readinessProbe:
            httpGet:
              path: /health
              port: http
            failureThreshold: 3
            periodSeconds: 10
            timeoutSeconds: 5
          volumeMounts:
            - name: models
              mountPath: /models
      volumes:
        - name: models
          hostPath:
            path: /k8s/llamacpp/models
            type: DirectoryOrCreate
@@ -0,0 +1,8 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - app.yaml
  - configmap.yaml
  - deployment.yaml
  - service.yaml
@@ -0,0 +1,15 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: llamacpp
  labels:
    app: llamacpp
 spec:
  type: ClusterIP
  selector:
    app: llamacpp
  ports:
    - name: http
      port: 8080
      targetPort: http
      protocol: TCP