From bc71dc455f79e1f9eb13892f6b79fc9561711a13 Mon Sep 17 00:00:00 2001 From: shenlan Date: Wed, 2 Jul 2025 17:12:28 +0800 Subject: [PATCH] Add gpu-k8s script --- scripts/gpu-k8s.sh | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 scripts/gpu-k8s.sh diff --git a/scripts/gpu-k8s.sh b/scripts/gpu-k8s.sh new file mode 100644 index 0000000..f0efe4e --- /dev/null +++ b/scripts/gpu-k8s.sh @@ -0,0 +1,28 @@ +#!/bin/bash +set -e + +# Install offline packages required for GPU support +install_all_offline_packages() { + echo "Installing GPU driver and toolkit packages" + # Implementation assumes packages are available locally + sudo apt-get update + sudo apt-get install -y nvidia-driver-535 nvidia-headless-535 nvidia-container-toolkit +} + +# Deploy the NVIDIA GPU operator +deploy_plugin() { + helm repo add nvidia https://helm.ngc.nvidia.com/nvidia || true + helm upgrade --install gpu-operator nvidia/gpu-operator \ + --namespace gpu-operator \ + --create-namespace \ + --set nodeSelector.kubernetes.io/gpu="true" \ + --set driver.enabled=true \ + --set toolkit.enabled=true \ + --set devicePlugin.enabled=true \ + --set operator.runtimeClass="nvidia-container-runtime" \ + --set operator.defaultRuntime=containerd \ + --set containerRuntime.socketPath=/var/run/containerd/containerd.sock +} + +install_all_offline_packages +deploy_plugin