From 23e5148814f260520f01017ccf6e78241a7a5a91 Mon Sep 17 00:00:00 2001 From: AB-UK Date: Fri, 6 Mar 2026 23:24:40 +0000 Subject: [PATCH] feat: Add NVIDIA GPU config and update Ollama to use it --- k8s/apps/ollama/ollama-values.yaml | 7 ++++++- k8s/core/gpu/app.yaml | 20 ++++++++++++++++++++ k8s/core/gpu/kustomization.yaml | 15 +++++++++++++++ k8s/core/gpu/runtime-class.yaml | 5 +++++ k8s/core/gpu/values.yaml | 21 +++++++++++++++++++++ 5 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 k8s/core/gpu/app.yaml create mode 100644 k8s/core/gpu/kustomization.yaml create mode 100644 k8s/core/gpu/runtime-class.yaml create mode 100644 k8s/core/gpu/values.yaml diff --git a/k8s/apps/ollama/ollama-values.yaml b/k8s/apps/ollama/ollama-values.yaml index 4d370b7..57899c4 100644 --- a/k8s/apps/ollama/ollama-values.yaml +++ b/k8s/apps/ollama/ollama-values.yaml @@ -3,6 +3,11 @@ image: pullPolicy: Always tag: "latest" nodeSelector: - kubernetes.io/hostname: master.tail2fe2d.ts.net + kubernetes.io/hostname: uk-desktop.tail2fe2d.ts.net ingress: enabled: false +ollama: + gpu: + enabled: true + type: 'nvidia' + number: 1 diff --git a/k8s/core/gpu/app.yaml b/k8s/core/gpu/app.yaml new file mode 100644 index 0000000..dff3718 --- /dev/null +++ b/k8s/core/gpu/app.yaml @@ -0,0 +1,20 @@ +apiVersion: argoproj.io/v1alpha1 +kind: Application +metadata: + name: gpu-system + namespace: argocd +spec: + project: core + destination: + namespace: gpu-system + server: https://kubernetes.default.svc + source: + repoURL: ssh://git@gt.hexor.cy:30022/ab/homelab.git + targetRevision: HEAD + path: k8s/core/gpu + syncPolicy: + automated: + selfHeal: true + prune: true + syncOptions: + - CreateNamespace=true diff --git a/k8s/core/gpu/kustomization.yaml b/k8s/core/gpu/kustomization.yaml new file mode 100644 index 0000000..45a573e --- /dev/null +++ b/k8s/core/gpu/kustomization.yaml @@ -0,0 +1,15 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +resources: + - app.yaml + - runtime-class.yaml + +helmCharts: + - name: nvidia-device-plugin + repo: https://nvidia.github.io/k8s-device-plugin + version: 0.17.0 + releaseName: nvidia-device-plugin + namespace: gpu-system + valuesFile: values.yaml + includeCRDs: true diff --git a/k8s/core/gpu/runtime-class.yaml b/k8s/core/gpu/runtime-class.yaml new file mode 100644 index 0000000..c26bd6d --- /dev/null +++ b/k8s/core/gpu/runtime-class.yaml @@ -0,0 +1,5 @@ +apiVersion: node.k8s.io/v1 +kind: RuntimeClass +metadata: + name: nvidia +handler: nvidia diff --git a/k8s/core/gpu/values.yaml b/k8s/core/gpu/values.yaml new file mode 100644 index 0000000..35d603d --- /dev/null +++ b/k8s/core/gpu/values.yaml @@ -0,0 +1,21 @@ +nodeSelector: + kubernetes.io/hostname: uk-desktop.tail2fe2d.ts.net + +runtimeClassName: nvidia + +setAsDefault: false + +config: + name: "nvidia-plugin-config" + map: + # use device driver mode instead of dev/nvmem + default: |- + version: v1 + flags: + migStrategy: none + failOnInitError: true + nvidiaDriverRoot: "/" + plugin: + passDeviceSpecs: false + deviceListStrategy: envvar + deviceIDStrategy: uuid