From f0bd7bf4053d9db33b82d9d0b831070d6073a0ef Mon Sep 17 00:00:00 2001 From: shenlan Date: Tue, 24 Jun 2025 11:25:41 +0800 Subject: [PATCH] Improve gpu-k8s role variable handling --- docs/gpu-k8s-role.md | 20 +++++++++++++++++-- playbooks/demo_gpu_k8s.yml | 5 +++++ .../roles/vhosts/gpu-k8s/defaults/main.yml | 4 ++-- .../vhosts/gpu-k8s/tasks/install_cluster.yml | 6 +++--- .../roles/vhosts/gpu-k8s/tasks/run_test.yml | 4 ++-- 5 files changed, 30 insertions(+), 9 deletions(-) diff --git a/docs/gpu-k8s-role.md b/docs/gpu-k8s-role.md index e8d4eb6..45c9eea 100644 --- a/docs/gpu-k8s-role.md +++ b/docs/gpu-k8s-role.md @@ -10,7 +10,7 @@ The role performs three main tasks: 2. **Install NVIDIA drivers and container runtime** on the target hosts so that Kubernetes can access GPU resources. 3. **Verify GPU access** by deploying the official NVIDIA device plugin and running a small CUDA workload. -The following command is used to create the cluster: +The following command is used to create the cluster (example with one master and one worker): ```bash sealos run \ @@ -35,7 +35,23 @@ Add the role to your playbook: - gpu-k8s ``` -Run the playbook with your inventory that contains the master and node IP addresses. +Example playbook snippet defining the IP lists: + +```yaml +- hosts: all + vars: + master_ips: + - "172.16.11.120" + node_ips: + - "172.16.11.152" + roles: + - gpu-k8s +``` + +The playbook expects `master_ips` and `node_ips` variables which are lists of IP addresses. Up to +three masters can be specified. + +Run the playbook with your inventory that contains these IP addresses. ```bash ansible-playbook -i inventory/hosts/all playbooks/demo_gpu_k8s.yml diff --git a/playbooks/demo_gpu_k8s.yml b/playbooks/demo_gpu_k8s.yml index 270765a..980bab6 100644 --- a/playbooks/demo_gpu_k8s.yml +++ b/playbooks/demo_gpu_k8s.yml @@ -1,4 +1,9 @@ - hosts: all become: true + vars: + master_ips: + - "172.16.11.120" + node_ips: + - "172.16.11.152" roles: - gpu-k8s diff --git a/playbooks/roles/vhosts/gpu-k8s/defaults/main.yml b/playbooks/roles/vhosts/gpu-k8s/defaults/main.yml index 92c84eb..dfe4eaf 100644 --- a/playbooks/roles/vhosts/gpu-k8s/defaults/main.yml +++ b/playbooks/roles/vhosts/gpu-k8s/defaults/main.yml @@ -2,7 +2,7 @@ sealos_version: v1.29.9 cilium_version: v1.13.4 helm_version: v3.9.4 -master_ip: "172.16.11.120" -node_ip: "172.16.11.152" +master_ips: [] # List of up to three master node IPs +node_ips: [] # List of worker node IPs sealos_cmd_env: '{}' kubeadm_init_cmd: "kubeadm init --skip-phases=addon/kube-proxy" diff --git a/playbooks/roles/vhosts/gpu-k8s/tasks/install_cluster.yml b/playbooks/roles/vhosts/gpu-k8s/tasks/install_cluster.yml index caa7eb9..e001dd4 100644 --- a/playbooks/roles/vhosts/gpu-k8s/tasks/install_cluster.yml +++ b/playbooks/roles/vhosts/gpu-k8s/tasks/install_cluster.yml @@ -4,10 +4,10 @@ registry.cn-shanghai.aliyuncs.com/labring/kubernetes:{{ sealos_version }} \ registry.cn-shanghai.aliyuncs.com/labring/cilium:{{ cilium_version }} \ registry.cn-shanghai.aliyuncs.com/labring/helm:{{ helm_version }} \ - --masters {{ master_ip }} \ - --nodes {{ node_ip }} \ + --masters {{ master_ips | join(',') }} \ + --nodes {{ node_ips | join(',') }} \ --env '{{ sealos_cmd_env }}' \ --cmd "{{ kubeadm_init_cmd }}" args: executable: /bin/bash - when: inventory_hostname == master_ip + when: inventory_hostname == (master_ips | first) diff --git a/playbooks/roles/vhosts/gpu-k8s/tasks/run_test.yml b/playbooks/roles/vhosts/gpu-k8s/tasks/run_test.yml index c0fd468..9ff04d2 100644 --- a/playbooks/roles/vhosts/gpu-k8s/tasks/run_test.yml +++ b/playbooks/roles/vhosts/gpu-k8s/tasks/run_test.yml @@ -2,7 +2,7 @@ shell: kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.5/nvidia-device-plugin.yml args: executable: /bin/bash - when: inventory_hostname == master_ip + when: inventory_hostname == (master_ips | first) - name: Run CUDA validation pod shell: | @@ -10,4 +10,4 @@ kubectl delete pod gpu-test --wait args: executable: /bin/bash - when: inventory_hostname == master_ip + when: inventory_hostname == (master_ips | first)