llama.cpp fixed DNS

llama.cpp fixed model name
pinned llama.cpp to server-rocm-b9501
2026-06-04 18:47:10 +03:00 · 2026-06-04 18:37:20 +03:00 · 2026-06-04 17:24:50 +03:00 · 2026-06-04 17:19:19 +03:00 · 2026-06-04 16:59:58 +03:00 · 2026-06-04 16:46:49 +03:00
7 changed files with 178 additions and 0 deletions
@@ -0,0 +1,20 @@
 apiVersion: argoproj.io/v1alpha1
 kind: Application
 metadata:
  name: llamacpp
  namespace: argocd
 spec:
  project: apps
  destination:
    namespace: llamacpp
    server: https://kubernetes.default.svc
  source:
    repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git
    targetRevision: HEAD
    path: k8s/apps/llamacpp
  syncPolicy:
    automated:
      selfHeal: true
      prune: true
    syncOptions:
      - CreateNamespace=true
@@ -0,0 +1,12 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: llamacpp-config
 data:
  LLAMA_CACHE: /models
  LLAMA_ARG_HOST: 0.0.0.0
  LLAMA_ARG_PORT: "8080"
  LLAMA_ARG_HF_REPO: "unsloth/Qwen3.6-35B-A3B-MTP-GGUF:UD-Q6_K"
  LLAMA_ARG_CTX_SIZE: "32768"
  LLAMA_ARG_FLASH_ATTN: auto
  LLAMA_ARG_FIT: "on"
@@ -0,0 +1,71 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llamacpp
  annotations:
    reloader.stakater.com/auto: "true"
  labels:
    app: llamacpp
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: llamacpp
  template:
    metadata:
      labels:
        app: llamacpp
    spec:
      dnsPolicy: Default
      nodeSelector:
        kubernetes.io/hostname: ai.tail2fe2d.ts.net
      tolerations:
        - key: workload
          operator: Equal
          value: ai
          effect: NoSchedule
      containers:
        - name: llamacpp
          image: ghcr.io/ggml-org/llama.cpp:server-rocm-b9501 
          imagePullPolicy: IfNotPresent
          envFrom:
            - configMapRef:
                name: llamacpp-config
          env:
            - name: HF_TOKEN
              valueFrom:
                secretKeyRef:
                  name: llamacpp-hf-token
                  key: token
                  optional: true
          ports:
            - name: http
              containerPort: 8080
              protocol: TCP
          resources:
            limits:
              amd.com/gpu: 1
          startupProbe:
            httpGet:
              path: /health
              port: http
            failureThreshold: 180
            periodSeconds: 10
            timeoutSeconds: 5
          readinessProbe:
            httpGet:
              path: /health
              port: http
            failureThreshold: 3
            periodSeconds: 10
            timeoutSeconds: 5
          volumeMounts:
            - name: models
              mountPath: /models
      volumes:
        - name: models
          hostPath:
            path: /k8s/llamacpp/models
            type: DirectoryOrCreate
@@ -0,0 +1,8 @@
 apiVersion: kustomize.config.k8s.io/v1beta1
 kind: Kustomization
 resources:
  - app.yaml
  - configmap.yaml
  - deployment.yaml
  - service.yaml
@@ -0,0 +1,15 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: llamacpp
  labels:
    app: llamacpp
 spec:
  type: ClusterIP
  selector:
    app: llamacpp
  ports:
    - name: http
      port: 8080
      targetPort: http
      protocol: TCP
@@ -0,0 +1,31 @@
 nfd:
  enabled: false
 labeller:
  enabled: false
 dp:
  image:
    repository: docker.io/rocm/k8s-device-plugin
    tag: "1.31.0.9"
  updateStrategy:
    type: RollingUpdate
    rollingUpdate:
      maxUnavailable: 1
 securityContext:
  allowPrivilegeEscalation: false
  capabilities:
    drop:
      - ALL
 tolerations:
  - key: workload
    operator: Equal
    value: ai
    effect: NoSchedule
 node_selector_enabled: true
 node_selector:
  kubernetes.io/arch: amd64
  kubernetes.io/hostname: ai.tail2fe2d.ts.net
@@ -13,3 +13,24 @@ helmCharts:
    namespace: gpu-system
    valuesFile: values.yaml
    includeCRDs: true
  - name: amd-gpu
    repo: https://rocm.github.io/k8s-device-plugin/
    version: 0.21.0
    releaseName: amd-gpu-device-plugin
    namespace: gpu-system
    valuesFile: amd-gpu-values.yaml
    includeCRDs: true
 patches:
  - target:
      group: apps
      version: v1
      kind: DaemonSet
      name: amd-gpu-device-plugin-daemonset
      namespace: gpu-system
    patch: |-
      - op: replace
        path: /spec/template/spec/nodeSelector
        value:
          kubernetes.io/arch: amd64
          kubernetes.io/hostname: ai.tail2fe2d.ts.net
Author	SHA1	Message	Date
Ultradesu	2bd3d91595	llama.cpp fixed DNS Auto-update README / Generate README and Create MR (push) Failing after 11m42s Details Check with kubeconform / lint (push) Failing after 12m48s Details Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Failing after 13m55s Details	2026-06-04 18:47:10 +03:00
Ultradesu	6f1f6c349d	llama.cpp fixed model name Check with kubeconform / lint (push) Failing after 11m8s Details Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Failing after 12m15s Details Auto-update README / Generate README and Create MR (push) Failing after 15m0s Details	2026-06-04 18:37:20 +03:00
Ultradesu	a8ee4bd2b2	pinned llama.cpp to server-rocm-b9501 Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Successful in 11s Details Auto-update README / Generate README and Create MR (push) Failing after 13m24s Details Check with kubeconform / lint (push) Failing after 14m31s Details	2026-06-04 17:24:50 +03:00
Ultradesu	0018d2b418	pinned llama.cpp to server-rocm-b9501 Check with kubeconform / lint (push) Successful in 13s Details Auto-update README / Generate README and Create MR (push) Failing after 11m59s Details Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Failing after 14m13s Details	2026-06-04 17:19:19 +03:00
Ultradesu	82dbe84075	Added llama.cpp on ai Auto-update README / Generate README and Create MR (push) Failing after 10m17s Details Check with kubeconform / lint (push) Failing after 11m23s Details Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Failing after 12m29s Details	2026-06-04 16:59:58 +03:00
Ultradesu	6b717f5219	Added amd-gpu controller Check with kubeconform / lint (push) Successful in 33s Details Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Failing after 11m48s Details Auto-update README / Generate README and Create MR (push) Failing after 14m2s Details	2026-06-04 16:46:49 +03:00
ab	0c1aa7d633	Merge pull request 'Auto-update README with k8s applications' (#406 ) from auto-update-readme-20260604-130654 into main Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Failing after 13m12s Details Reviewed-on: #406	2026-06-04 13:43:51 +00:00