Added llama.cpp on ai
This commit is contained in:
@@ -0,0 +1,70 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llamacpp
|
||||
annotations:
|
||||
reloader.stakater.com/auto: "true"
|
||||
labels:
|
||||
app: llamacpp
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llamacpp
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: llamacpp
|
||||
spec:
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: ai.tail2fe2d.ts.net
|
||||
tolerations:
|
||||
- key: workload
|
||||
operator: Equal
|
||||
value: ai
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: llamacpp
|
||||
image: ghcr.io/ggml-org/llama.cpp:server-rocm
|
||||
imagePullPolicy: Always
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: llamacpp-config
|
||||
env:
|
||||
- name: HF_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: llamacpp-hf-token
|
||||
key: token
|
||||
optional: true
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
amd.com/gpu: 1
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
failureThreshold: 180
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
failureThreshold: 3
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /models
|
||||
volumes:
|
||||
- name: models
|
||||
hostPath:
|
||||
path: /k8s/llamacpp/models
|
||||
type: DirectoryOrCreate
|
||||
Reference in New Issue
Block a user