72 lines
1.7 KiB
YAML
72 lines
1.7 KiB
YAML
apiVersion: apps/v1
|
|
kind: Deployment
|
|
metadata:
|
|
name: llamacpp
|
|
annotations:
|
|
reloader.stakater.com/auto: "true"
|
|
labels:
|
|
app: llamacpp
|
|
spec:
|
|
replicas: 1
|
|
strategy:
|
|
type: Recreate
|
|
selector:
|
|
matchLabels:
|
|
app: llamacpp
|
|
template:
|
|
metadata:
|
|
labels:
|
|
app: llamacpp
|
|
spec:
|
|
dnsPolicy: Default
|
|
nodeSelector:
|
|
kubernetes.io/hostname: ai.tail2fe2d.ts.net
|
|
tolerations:
|
|
- key: workload
|
|
operator: Equal
|
|
value: ai
|
|
effect: NoSchedule
|
|
containers:
|
|
- name: llamacpp
|
|
image: ghcr.io/ggml-org/llama.cpp:server-rocm-b9501
|
|
imagePullPolicy: IfNotPresent
|
|
envFrom:
|
|
- configMapRef:
|
|
name: llamacpp-config
|
|
env:
|
|
- name: HF_TOKEN
|
|
valueFrom:
|
|
secretKeyRef:
|
|
name: llamacpp-hf-token
|
|
key: token
|
|
optional: true
|
|
ports:
|
|
- name: http
|
|
containerPort: 8080
|
|
protocol: TCP
|
|
resources:
|
|
limits:
|
|
amd.com/gpu: 1
|
|
startupProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
failureThreshold: 180
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
readinessProbe:
|
|
httpGet:
|
|
path: /health
|
|
port: http
|
|
failureThreshold: 3
|
|
periodSeconds: 10
|
|
timeoutSeconds: 5
|
|
volumeMounts:
|
|
- name: models
|
|
mountPath: /models
|
|
volumes:
|
|
- name: models
|
|
hostPath:
|
|
path: /k8s/llamacpp/models
|
|
type: DirectoryOrCreate
|