Files
homelab/k8s/apps/llamacpp/deployment.yaml
T
Ultradesu a8ee4bd2b2
Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Successful in 11s
Auto-update README / Generate README and Create MR (push) Failing after 13m24s
Check with kubeconform / lint (push) Failing after 14m31s
pinned llama.cpp to server-rocm-b9501
2026-06-04 17:24:50 +03:00

71 lines
1.7 KiB
YAML

apiVersion: apps/v1
kind: Deployment
metadata:
name: llamacpp
annotations:
reloader.stakater.com/auto: "true"
labels:
app: llamacpp
spec:
replicas: 1
strategy:
type: Recreate
selector:
matchLabels:
app: llamacpp
template:
metadata:
labels:
app: llamacpp
spec:
nodeSelector:
kubernetes.io/hostname: ai.tail2fe2d.ts.net
tolerations:
- key: workload
operator: Equal
value: ai
effect: NoSchedule
containers:
- name: llamacpp
image: ghcr.io/ggml-org/llama.cpp:server-rocm-b9501
imagePullPolicy: IfNotPresent
envFrom:
- configMapRef:
name: llamacpp-config
env:
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: llamacpp-hf-token
key: token
optional: true
ports:
- name: http
containerPort: 8080
protocol: TCP
resources:
limits:
amd.com/gpu: 1
startupProbe:
httpGet:
path: /health
port: http
failureThreshold: 180
periodSeconds: 10
timeoutSeconds: 5
readinessProbe:
httpGet:
path: /health
port: http
failureThreshold: 3
periodSeconds: 10
timeoutSeconds: 5
volumeMounts:
- name: models
mountPath: /models
volumes:
- name: models
hostPath:
path: /k8s/llamacpp/models
type: DirectoryOrCreate