Auto-update README with current k8s applications

Generated by CI/CD workflow on 2026-06-17 00:24:38 This PR updates the README.md file with the current list of applications found in the k8s/ directory structure.
2026-06-17 00:24:38 +00:00
5 changed files with 1 additions and 103 deletions
@@ -1,12 +0,0 @@
 apiVersion: v1
 kind: ConfigMap
 metadata:
  name: llamacpp-cuda-config
 data:
  LLAMA_CACHE: /models
  LLAMA_ARG_HOST: 0.0.0.0
  LLAMA_ARG_PORT: "8080"
  LLAMA_ARG_HF_REPO: "unsloth/gemma-4-12b-it-GGUF:Q6_K"
  LLAMA_ARG_CTX_SIZE: "32768"
  LLAMA_ARG_FLASH_ATTN: auto
  LLAMA_ARG_FIT: "on"
@@ -1,72 +0,0 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: llamacpp-cuda
  annotations:
    reloader.stakater.com/auto: "true"
  labels:
    app: llamacpp-cuda
 spec:
  replicas: 1
  strategy:
    type: Recreate
  selector:
    matchLabels:
      app: llamacpp-cuda
  template:
    metadata:
      labels:
        app: llamacpp-cuda
    spec:
      dnsPolicy: Default
      runtimeClassName: nvidia
      nodeSelector:
        kubernetes.io/hostname: uk-desktop.tail2fe2d.ts.net
      tolerations:
        - key: workload
          operator: Equal
          value: desktop
          effect: NoSchedule
      containers:
        - name: llamacpp
          image: ghcr.io/ggml-org/llama.cpp:server-cuda-b9501
          imagePullPolicy: IfNotPresent
          envFrom:
            - configMapRef:
                name: llamacpp-cuda-config
          env:
            - name: HF_TOKEN
              valueFrom:
                secretKeyRef:
                  name: llamacpp-hf-token
                  key: token
                  optional: true
          ports:
            - name: http
              containerPort: 8080
              protocol: TCP
          resources:
            limits:
              nvidia.com/gpu: 1
          startupProbe:
            httpGet:
              path: /health
              port: http
            failureThreshold: 180
            periodSeconds: 10
            timeoutSeconds: 5
          readinessProbe:
            httpGet:
              path: /health
              port: http
            failureThreshold: 3
            periodSeconds: 10
            timeoutSeconds: 5
          volumeMounts:
            - name: models
              mountPath: /models
      volumes:
        - name: models
          hostPath:
            path: /data/llama.cpp/models
            type: DirectoryOrCreate
@@ -3,9 +3,6 @@ kind: Kustomization
 resources:
  - app.yaml
  - configmap-cuda.yaml
  - configmap.yaml
  - deployment-cuda.yaml
  - deployment.yaml
  - service-cuda.yaml
  - service.yaml
@@ -1,15 +0,0 @@
 apiVersion: v1
 kind: Service
 metadata:
  name: llamacpp-cuda
  labels:
    app: llamacpp-cuda
 spec:
  type: ClusterIP
  selector:
    app: llamacpp-cuda
  ports:
    - name: http
      port: 8080
      targetPort: http
      protocol: TCP
@@ -11,7 +11,7 @@ spec:
  selector:
    matchLabels:
      app: pasarguard
-  replicas: 2
+  replicas: 1
  strategy:
    type: RollingUpdate
  template: