Compare commits
1 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| cf1db3557e |
@@ -1,12 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: llamacpp-cuda-config
|
||||
data:
|
||||
LLAMA_CACHE: /models
|
||||
LLAMA_ARG_HOST: 0.0.0.0
|
||||
LLAMA_ARG_PORT: "8080"
|
||||
LLAMA_ARG_HF_REPO: "unsloth/gemma-4-12b-it-GGUF:Q6_K"
|
||||
LLAMA_ARG_CTX_SIZE: "32768"
|
||||
LLAMA_ARG_FLASH_ATTN: auto
|
||||
LLAMA_ARG_FIT: "on"
|
||||
@@ -1,72 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llamacpp-cuda
|
||||
annotations:
|
||||
reloader.stakater.com/auto: "true"
|
||||
labels:
|
||||
app: llamacpp-cuda
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llamacpp-cuda
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: llamacpp-cuda
|
||||
spec:
|
||||
dnsPolicy: Default
|
||||
runtimeClassName: nvidia
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: uk-desktop.tail2fe2d.ts.net
|
||||
tolerations:
|
||||
- key: workload
|
||||
operator: Equal
|
||||
value: desktop
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: llamacpp
|
||||
image: ghcr.io/ggml-org/llama.cpp:server-cuda-b9501
|
||||
imagePullPolicy: IfNotPresent
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: llamacpp-cuda-config
|
||||
env:
|
||||
- name: HF_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: llamacpp-hf-token
|
||||
key: token
|
||||
optional: true
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
failureThreshold: 180
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
failureThreshold: 3
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /models
|
||||
volumes:
|
||||
- name: models
|
||||
hostPath:
|
||||
path: /data/llama.cpp/models
|
||||
type: DirectoryOrCreate
|
||||
@@ -3,9 +3,6 @@ kind: Kustomization
|
||||
|
||||
resources:
|
||||
- app.yaml
|
||||
- configmap-cuda.yaml
|
||||
- configmap.yaml
|
||||
- deployment-cuda.yaml
|
||||
- deployment.yaml
|
||||
- service-cuda.yaml
|
||||
- service.yaml
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llamacpp-cuda
|
||||
labels:
|
||||
app: llamacpp-cuda
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llamacpp-cuda
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
@@ -11,7 +11,7 @@ spec:
|
||||
selector:
|
||||
matchLabels:
|
||||
app: pasarguard
|
||||
replicas: 2
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: RollingUpdate
|
||||
template:
|
||||
@@ -50,10 +50,6 @@ spec:
|
||||
value: "/app/tls/tls.crt"
|
||||
- name: UVICORN_SSL_KEYFILE
|
||||
value: "/app/tls/tls.key"
|
||||
- name: UVICORN_PROXY_HEADERS
|
||||
value: "true"
|
||||
- name: FORWARDED_ALLOW_IPS
|
||||
value: "*"
|
||||
- name: CUSTOM_TEMPLATES_DIRECTORY
|
||||
value: "/code/app/templates/"
|
||||
- name: SUBSCRIPTION_PAGE_TEMPLATE
|
||||
|
||||
Reference in New Issue
Block a user