Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| cd1c24bdeb | |||
| f129977993 | |||
| cf4c70075c | |||
| 2b979b5f43 | |||
| dbecdb7069 | |||
| fb7dfbee57 | |||
| 6b5a0fc31f |
@@ -0,0 +1,12 @@
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: llamacpp-cuda-config
|
||||
data:
|
||||
LLAMA_CACHE: /models
|
||||
LLAMA_ARG_HOST: 0.0.0.0
|
||||
LLAMA_ARG_PORT: "8080"
|
||||
LLAMA_ARG_HF_REPO: "unsloth/gemma-4-12b-it-GGUF:Q6_K"
|
||||
LLAMA_ARG_CTX_SIZE: "128000"
|
||||
LLAMA_ARG_FLASH_ATTN: auto
|
||||
LLAMA_ARG_FIT: "on"
|
||||
@@ -0,0 +1,72 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: llamacpp-cuda
|
||||
annotations:
|
||||
reloader.stakater.com/auto: "true"
|
||||
labels:
|
||||
app: llamacpp-cuda
|
||||
spec:
|
||||
replicas: 1
|
||||
strategy:
|
||||
type: Recreate
|
||||
selector:
|
||||
matchLabels:
|
||||
app: llamacpp-cuda
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: llamacpp-cuda
|
||||
spec:
|
||||
dnsPolicy: Default
|
||||
runtimeClassName: nvidia
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: uk-desktop.tail2fe2d.ts.net
|
||||
tolerations:
|
||||
- key: workload
|
||||
operator: Equal
|
||||
value: desktop
|
||||
effect: NoSchedule
|
||||
containers:
|
||||
- name: llamacpp
|
||||
image: ghcr.io/ggml-org/llama.cpp:server-cuda-b9501
|
||||
imagePullPolicy: IfNotPresent
|
||||
envFrom:
|
||||
- configMapRef:
|
||||
name: llamacpp-cuda-config
|
||||
env:
|
||||
- name: HF_TOKEN
|
||||
valueFrom:
|
||||
secretKeyRef:
|
||||
name: llamacpp-hf-token
|
||||
key: token
|
||||
optional: true
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 8080
|
||||
protocol: TCP
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: 1
|
||||
startupProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
failureThreshold: 180
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
failureThreshold: 3
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
volumeMounts:
|
||||
- name: models
|
||||
mountPath: /models
|
||||
volumes:
|
||||
- name: models
|
||||
hostPath:
|
||||
path: /data/llama.cpp/models
|
||||
type: DirectoryOrCreate
|
||||
@@ -3,6 +3,9 @@ kind: Kustomization
|
||||
|
||||
resources:
|
||||
- app.yaml
|
||||
- configmap-cuda.yaml
|
||||
- configmap.yaml
|
||||
- deployment-cuda.yaml
|
||||
- deployment.yaml
|
||||
- service-cuda.yaml
|
||||
- service.yaml
|
||||
|
||||
@@ -0,0 +1,15 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: llamacpp-cuda
|
||||
labels:
|
||||
app: llamacpp-cuda
|
||||
spec:
|
||||
type: ClusterIP
|
||||
selector:
|
||||
app: llamacpp-cuda
|
||||
ports:
|
||||
- name: http
|
||||
port: 8080
|
||||
targetPort: http
|
||||
protocol: TCP
|
||||
@@ -15,14 +15,14 @@ resources:
|
||||
- service.yaml
|
||||
- ingress.yaml
|
||||
|
||||
helmCharts:
|
||||
- name: yacy
|
||||
repo: https://gt.hexor.cy/api/packages/ab/helm
|
||||
version: 0.1.2
|
||||
releaseName: yacy
|
||||
namespace: n8n
|
||||
valuesFile: values-yacy.yaml
|
||||
includeCRDs: true
|
||||
# helmCharts:
|
||||
# - name: yacy
|
||||
# repo: https://gt.hexor.cy/api/packages/ab/helm
|
||||
# version: 0.1.2
|
||||
# releaseName: yacy
|
||||
# namespace: n8n
|
||||
# valuesFile: values-yacy.yaml
|
||||
# includeCRDs: true
|
||||
|
||||
commonLabels:
|
||||
app.kubernetes.io/name: n8n
|
||||
|
||||
@@ -7,7 +7,7 @@ kind: Kustomization
|
||||
helmCharts:
|
||||
- name: longhorn
|
||||
repo: https://charts.longhorn.io
|
||||
version: 1.11.2
|
||||
version: 1.12.0
|
||||
releaseName: longhorn
|
||||
namespace: longhorn
|
||||
valuesFile: values.yaml
|
||||
|
||||
Reference in New Issue
Block a user