Add roles for GPU operator and monitoring charts

This commit is contained in:
shenlan 2025-07-01 11:47:11 +08:00
parent 9d6d787c93
commit ad0960d036
9 changed files with 61 additions and 0 deletions

View File

@ -0,0 +1,11 @@
#!/bin/bash
helm upgrade --install gpu-operator nvidia/gpu-operator \
--namespace gpu-operator \
--create-namespace \
--set nodeSelector.kubernetes.io/gpu="true" \
--set driver.enabled=true \
--set toolkit.enabled=true \
--set devicePlugin.enabled=true \
--set operator.runtimeClass="nvidia-container-runtime" \
--set operator.defaultRuntime=containerd \
--set containerRuntime.socketPath=/var/snap/microk8s/common/run/containerd.sock

View File

@ -0,0 +1,3 @@
- name: Install GPU Operator
script: files/setup.sh
when: is_primary | bool

View File

@ -0,0 +1,9 @@
- name: Enable community plugins and third-party helm charts
shell: |
helm repo add kubernetes-dashboard https://kubernetes.github.io/dashboard/ || true
helm repo add nvidia https://helm.ngc.nvidia.com/nvidia || true
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts || true
helm repo add metrics-server https://kubernetes-sigs.github.io/metrics-server/ || true
helm repo update
ignore_errors: yes
when: is_primary | bool

View File

@ -0,0 +1,11 @@
#!/bin/bash
helm upgrade --install kubernetes-dashboard kubernetes-dashboard/kubernetes-dashboard \
--create-namespace \
--namespace kubernetes-dashboard \
--set app.scheduling.nodeSelector."kubernetes\.io/hostname"=$1 \
--set auth.nodeSelector."kubernetes\.io/hostname"=$1 \
--set api.nodeSelector."kubernetes\.io/hostname"=$1 \
--set web.nodeSelector."kubernetes\.io/hostname"=$1 \
--set metricsScraper.nodeSelector."kubernetes\.io/hostname"=$1 \
--set kong.nodeSelector."kubernetes\.io/hostname"=$1 \
--set persistence.enabled=false

View File

@ -0,0 +1,3 @@
- name: Install kubernetes dashboard
script: files/setup.sh {{ inventory_hostname }}
when: is_primary | bool

View File

@ -0,0 +1,5 @@
#!/bin/bash
helm upgrade --install metrics-server metrics-server/metrics-server \
--namespace kube-system \
--set nodeSelector."kubernetes\.io/hostname"=$1 \
--set persistence.enabled=false

View File

@ -0,0 +1,3 @@
- name: Install metrics server
script: files/setup.sh {{ inventory_hostname }}
when: is_primary | bool

View File

@ -0,0 +1,13 @@
#!/bin/bash
helm upgrade --install prometheus prometheus-community/prometheus \
--namespace chutes \
--create-namespace \
--set server.persistentVolume.enabled=false \
--set alertmanager.persistentVolume.enabled=false \
--set prometheus-pushgateway.persistentVolume.enabled=false \
--set prometheus-server.persistentVolume.enabled=false \
--set alertmanager.persistence.enabled=false \
--set server.nodeSelector."kubernetes\.io/hostname"=$1 \
--set alertmanager.nodeSelector."kubernetes\.io/hostname"=$1 \
--set pushgateway.nodeSelector."kubernetes\.io/hostname"=$1 \
--set kubeStateMetrics.nodeSelector."kubernetes\.io/hostname"=$1

View File

@ -0,0 +1,3 @@
- name: Install Prometheus
script: files/setup.sh {{ inventory_hostname }}
when: is_primary | bool