Added amd-gpu controller
Check with kubeconform / lint (push) Successful in 33s
Update Kubernetes Services Wiki / Generate and Update K8s Wiki (push) Failing after 11m48s
Auto-update README / Generate README and Create MR (push) Failing after 14m2s

This commit is contained in:
Ultradesu
2026-06-04 16:46:40 +03:00
parent 0c1aa7d633
commit 6b717f5219
2 changed files with 52 additions and 0 deletions
+31
View File
@@ -0,0 +1,31 @@
nfd:
enabled: false
labeller:
enabled: false
dp:
image:
repository: docker.io/rocm/k8s-device-plugin
tag: "1.31.0.9"
updateStrategy:
type: RollingUpdate
rollingUpdate:
maxUnavailable: 1
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
tolerations:
- key: workload
operator: Equal
value: ai
effect: NoSchedule
node_selector_enabled: true
node_selector:
kubernetes.io/arch: amd64
kubernetes.io/hostname: ai.tail2fe2d.ts.net
+21
View File
@@ -13,3 +13,24 @@ helmCharts:
namespace: gpu-system
valuesFile: values.yaml
includeCRDs: true
- name: amd-gpu
repo: https://rocm.github.io/k8s-device-plugin/
version: 0.21.0
releaseName: amd-gpu-device-plugin
namespace: gpu-system
valuesFile: amd-gpu-values.yaml
includeCRDs: true
patches:
- target:
group: apps
version: v1
kind: DaemonSet
name: amd-gpu-device-plugin-daemonset
namespace: gpu-system
patch: |-
- op: replace
path: /spec/template/spec/nodeSelector
value:
kubernetes.io/arch: amd64
kubernetes.io/hostname: ai.tail2fe2d.ts.net