diff --git a/docs/gpu-k8s-role.md b/docs/gpu-k8s-role.md index 119a87e..be9ac4c 100644 --- a/docs/gpu-k8s-role.md +++ b/docs/gpu-k8s-role.md @@ -59,8 +59,26 @@ Example playbook snippet defining the IP lists: - gpu-k8s ``` -The playbook expects `master_ips` and `node_ips` variables which are lists of IP addresses. Up to -three masters can be specified. +You can also specify hostnames and let the role look up the IPs: + +```yaml +- hosts: all + vars: + masters: + - "k8s-1" + nodes: + - "k8s-2" + - "k8s-3" + roles: + - ssh-trust + - gpu-k8s +``` + +The playbook expects at least one master and one node. You can provide the +addresses directly via `master_ips` and `node_ips`, or give hostnames in the +`masters` and `nodes` variables. When hostnames are used, the role will look up +their `ansible_host` values from the inventory to obtain the IPs. Up to three +masters can be specified. Run the playbook with your inventory that contains the master and node IP addresses. diff --git a/playbooks/roles/vhosts/gpu-k8s/defaults/main.yml b/playbooks/roles/vhosts/gpu-k8s/defaults/main.yml index dbd9fc2..7eb77f6 100644 --- a/playbooks/roles/vhosts/gpu-k8s/defaults/main.yml +++ b/playbooks/roles/vhosts/gpu-k8s/defaults/main.yml @@ -3,6 +3,8 @@ sealos_version: latest kubernetes_version: v1.25.16 cilium_version: v1.13.4 helm_version: v3.9.4 +masters: [] # List of master hostnames +nodes: [] # List of worker hostnames master_ips: [] # List of up to three master node IPs node_ips: [] # List of worker node IPs sealos_cmd_env: '{}' diff --git a/playbooks/roles/vhosts/gpu-k8s/files/get_labring_registry.sh b/playbooks/roles/vhosts/gpu-k8s/files/get_labring_registry.sh old mode 100644 new mode 100755 diff --git a/playbooks/roles/vhosts/gpu-k8s/tasks/install_cluster.yml b/playbooks/roles/vhosts/gpu-k8s/tasks/install_cluster.yml index f948555..33d8cb7 100644 --- a/playbooks/roles/vhosts/gpu-k8s/tasks/install_cluster.yml +++ b/playbooks/roles/vhosts/gpu-k8s/tasks/install_cluster.yml @@ -10,6 +10,18 @@ sealos_version: "{{ (sealos_latest.content | from_json).tag_name }}" when: sealos_version is not defined or sealos_version == 'latest' +- name: Resolve master and node IPs from hostnames when needed + set_fact: + master_ips: >- + {{ (master_ips | default([])) | length > 0 + | ternary(master_ips, + masters | default([]) | map('extract', hostvars, 'ansible_host') | list) }} + node_ips: >- + {{ (node_ips | default([])) | length > 0 + | ternary(node_ips, + nodes | default([]) | map('extract', hostvars, 'ansible_host') | list) }} + + - name: Install sealos CLI shell: | VERSION={{ sealos_version }} @@ -19,7 +31,7 @@ mv sealos /usr/bin/sealos args: executable: /bin/bash - when: inventory_hostname == (ops_host | default(master_ips | first)) + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) - name: Install Helm shell: | @@ -29,7 +41,7 @@ rm -f get_helm.sh args: executable: /bin/bash - when: inventory_hostname == (ops_host | default(master_ips | first)) and (ansible_facts['distribution'] == 'Ubuntu' or ansible_facts['distribution'] == 'Debian') + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) and (ansible_facts['distribution'] == 'Ubuntu' or ansible_facts['distribution'] == 'Debian') - name: Install nerdctl shell: | @@ -37,7 +49,7 @@ tar -C /usr/local -xzf nerdctl-2.0.2-linux-amd64.tar.gz args: executable: /bin/bash - when: inventory_hostname == (ops_host | default(master_ips | first)) + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) - name: Determine LabRing registry prefix shell: "{{ role_path }}/files/get_labring_registry.sh" @@ -46,7 +58,15 @@ delegate_to: localhost run_once: true become: false - when: inventory_hostname == (ops_host | default(master_ips | first)) + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) + +- name: Validate master_ips and node_ips + assert: + that: + - master_ips | length > 0 + - node_ips | length > 0 + fail_msg: "Provide masters/nodes hostnames or master_ips/node_ips with at least one entry" + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) - name: Run sealos to create Kubernetes cluster shell: | @@ -60,13 +80,13 @@ --cmd "{{ kubeadm_init_cmd }}" args: executable: /bin/bash - when: inventory_hostname == (ops_host | default(master_ips | first)) + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) - name: Show sealos CLI version command: sealos version register: sealos_cli_version changed_when: false - when: inventory_hostname == (ops_host | default(master_ips | first)) + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) - name: Display Kubernetes cluster status shell: kubectl get nodes -o wide @@ -74,9 +94,9 @@ executable: /bin/bash register: k8s_status changed_when: false - when: inventory_hostname == (ops_host | default(master_ips | first)) + when: inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) - name: Print cluster nodes debug: msg: "{{ k8s_status.stdout }}" - when: k8s_status is defined and inventory_hostname == (ops_host | default(master_ips | first)) + when: k8s_status is defined and inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) diff --git a/playbooks/roles/vhosts/gpu-k8s/tasks/run_test.yml b/playbooks/roles/vhosts/gpu-k8s/tasks/run_test.yml index 6366def..76255f4 100644 --- a/playbooks/roles/vhosts/gpu-k8s/tasks/run_test.yml +++ b/playbooks/roles/vhosts/gpu-k8s/tasks/run_test.yml @@ -2,7 +2,7 @@ shell: kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.14.5/nvidia-device-plugin.yml args: executable: /bin/bash - when: master_ips|length > 0 and inventory_hostname == (master_ips | first) + when: master_ips|length > 0 and inventory_hostname == (ops_host | default(masters | default(master_ips) | first)) - name: Run CUDA validation pod shell: | @@ -10,5 +10,5 @@ kubectl delete pod gpu-test --wait args: executable: /bin/bash - when: master_ips|length > 0 and inventory_hostname == (master_ips | first) + when: master_ips|length > 0 and inventory_hostname == (ops_host | default(masters | default(master_ips) | first))