feat(gpu_inference): add comprehensive GPU inference infrastructure with Sealos, Ray, and vLLM
This commit is contained in:
parent
413d46995b
commit
515ba95c75
8
gpu_inference_01_prepare.yml
Normal file
8
gpu_inference_01_prepare.yml
Normal file
@ -0,0 +1,8 @@
|
||||
---
|
||||
- name: Prepare Host Environment
|
||||
hosts: all
|
||||
become: true
|
||||
roles:
|
||||
- roles/vhosts/common
|
||||
- roles/vhosts/kernel_tuning
|
||||
- roles/docker/container_runtime
|
||||
7
gpu_inference_02_sealos.yml
Normal file
7
gpu_inference_02_sealos.yml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Install Kubernetes via Sealos
|
||||
hosts: masters
|
||||
become: true
|
||||
roles:
|
||||
- roles/vhosts/sealos_cluster
|
||||
- roles/vhosts/cni_cilium
|
||||
6
gpu_inference_03_gpu_operator.yml
Normal file
6
gpu_inference_03_gpu_operator.yml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
- name: Install NVIDIA GPU Operator
|
||||
hosts: masters[0]
|
||||
become: true
|
||||
roles:
|
||||
- roles/charts/nvidia_gpu_operator
|
||||
7
gpu_inference_04_ray.yml
Normal file
7
gpu_inference_04_ray.yml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Deploy Ray Cluster
|
||||
hosts: masters[0]
|
||||
become: true
|
||||
roles:
|
||||
- roles/charts/ray_cluster
|
||||
- roles/charts/ray_service
|
||||
7
gpu_inference_05_vllm.yml
Normal file
7
gpu_inference_05_vllm.yml
Normal file
@ -0,0 +1,7 @@
|
||||
---
|
||||
- name: Deploy vLLM Inference Service
|
||||
hosts: masters[0]
|
||||
become: true
|
||||
roles:
|
||||
- roles/charts/vllm_runtime
|
||||
- roles/charts/vllm_service
|
||||
6
gpu_inference_site.yml
Normal file
6
gpu_inference_site.yml
Normal file
@ -0,0 +1,6 @@
|
||||
---
|
||||
- import_playbook: gpu_inference_01_prepare.yml
|
||||
- import_playbook: gpu_inference_02_sealos.yml
|
||||
- import_playbook: gpu_inference_03_gpu_operator.yml
|
||||
- import_playbook: gpu_inference_04_ray.yml
|
||||
- import_playbook: gpu_inference_05_vllm.yml
|
||||
27
inventory/group_vars/all.yml
Normal file
27
inventory/group_vars/all.yml
Normal file
@ -0,0 +1,27 @@
|
||||
---
|
||||
# 全局版本与镜像
|
||||
kubernetes_version: "v1.28.9"
|
||||
sealos_version: "5.0.0"
|
||||
cilium_version: "1.15.5"
|
||||
gpu_operator_version: "v24.3.0"
|
||||
kuberay_version: "1.1.0"
|
||||
ray_version: "2.9.0"
|
||||
vllm_image: "vllm/vllm-openai:v0.4.2"
|
||||
|
||||
# 网络配置
|
||||
pod_cidr: "10.244.0.0/16"
|
||||
service_cidr: "10.96.0.0/12"
|
||||
nccl_socket_ifname: "eth0"
|
||||
gloo_socket_ifname: "eth0"
|
||||
|
||||
# 模型与推理配置
|
||||
vllm_model: "/models/Llama-3-70B-Instruct"
|
||||
vllm_tensor_parallel_size: 2
|
||||
vllm_pipeline_parallel_size: 1
|
||||
|
||||
# GPU 驱动策略
|
||||
driver_enabled: true
|
||||
driver_version: "535.129.03"
|
||||
dcgm_exporter_enabled: true
|
||||
|
||||
ansible_user: "root"
|
||||
1
inventory/group_vars/gpu_workers.yml
Normal file
1
inventory/group_vars/gpu_workers.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
1
inventory/group_vars/masters.yml
Normal file
1
inventory/group_vars/masters.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
1
inventory/group_vars/ray_workers.yml
Normal file
1
inventory/group_vars/ray_workers.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
13
inventory/hosts.ini
Normal file
13
inventory/hosts.ini
Normal file
@ -0,0 +1,13 @@
|
||||
[masters]
|
||||
k8s-master-01 ansible_host=10.0.0.10
|
||||
|
||||
[gpu_workers]
|
||||
k8s-gpu-01 ansible_host=10.0.0.21 accelerator=nvidia-h100
|
||||
k8s-gpu-02 ansible_host=10.0.0.22 accelerator=nvidia-h100
|
||||
|
||||
[ray_workers:children]
|
||||
gpu_workers
|
||||
|
||||
[k8s_cluster:children]
|
||||
masters
|
||||
gpu_workers
|
||||
15
roles/charts/nvidia_gpu_operator/defaults/main.yml
Normal file
15
roles/charts/nvidia_gpu_operator/defaults/main.yml
Normal file
@ -0,0 +1,15 @@
|
||||
---
|
||||
gpu_operator_namespace: "gpu-operator"
|
||||
gpu_operator_release_name: "gpu-operator"
|
||||
gpu_operator_chart_version: "v24.3.0"
|
||||
|
||||
# Air-gapped / Private registry support
|
||||
gpu_operator_repository: "https://helm.ngc.nvidia.com/nvidia"
|
||||
image_pull_secrets: []
|
||||
|
||||
# Operator settings
|
||||
driver_enabled: true
|
||||
driver_version: "535.129.03"
|
||||
toolkit_enabled: true
|
||||
mig_strategy: "single" # none, single, mixed
|
||||
dcgm_exporter_enabled: true
|
||||
1
roles/charts/nvidia_gpu_operator/handlers/main.yml
Normal file
1
roles/charts/nvidia_gpu_operator/handlers/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
28
roles/charts/nvidia_gpu_operator/tasks/main.yml
Normal file
28
roles/charts/nvidia_gpu_operator/tasks/main.yml
Normal file
@ -0,0 +1,28 @@
|
||||
---
|
||||
- name: Create GPU Operator namespace
|
||||
kubernetes.core.k8s:
|
||||
api_version: v1
|
||||
kind: Namespace
|
||||
name: "{{ gpu_operator_namespace }}"
|
||||
state: present
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
|
||||
- name: Add NVIDIA helm repo
|
||||
kubernetes.core.helm_repository:
|
||||
name: nvidia
|
||||
repo_url: "{{ gpu_operator_repository }}"
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
|
||||
- name: Deploy GPU Operator
|
||||
kubernetes.core.helm:
|
||||
name: "{{ gpu_operator_release_name }}"
|
||||
chart_ref: nvidia/gpu-operator
|
||||
release_namespace: "{{ gpu_operator_namespace }}"
|
||||
version: "{{ gpu_operator_chart_version }}"
|
||||
values: "{{ lookup('template', 'values.yaml.j2') | from_yaml }}"
|
||||
wait: true
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
|
||||
- name: Include validation tasks
|
||||
include_tasks: validate.yml
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
15
roles/charts/nvidia_gpu_operator/tasks/validate.yml
Normal file
15
roles/charts/nvidia_gpu_operator/tasks/validate.yml
Normal file
@ -0,0 +1,15 @@
|
||||
---
|
||||
- name: Wait for NVIDIA Device Plugin daemonset to be ready
|
||||
shell: |
|
||||
kubectl rollout status daemonset/nvidia-device-plugin-daemonset -n {{ gpu_operator_namespace }} --timeout=300s
|
||||
register: ds_status
|
||||
changed_when: false
|
||||
|
||||
- name: Validate GPU resources are allocatable
|
||||
shell: |
|
||||
kubectl get nodes -l nvidia.com/gpu.present=true -o jsonpath='{.items[*].status.allocatable}'
|
||||
register: gpu_allocatable
|
||||
until: "'nvidia.com/gpu' in gpu_allocatable.stdout"
|
||||
retries: 30
|
||||
delay: 20
|
||||
changed_when: false
|
||||
15
roles/charts/nvidia_gpu_operator/templates/values.yaml.j2
Normal file
15
roles/charts/nvidia_gpu_operator/templates/values.yaml.j2
Normal file
@ -0,0 +1,15 @@
|
||||
driver:
|
||||
enabled: {{ driver_enabled }}
|
||||
version: "{{ driver_version }}"
|
||||
toolkit:
|
||||
enabled: {{ toolkit_enabled }}
|
||||
devicePlugin:
|
||||
enabled: true
|
||||
mig:
|
||||
strategy: "{{ mig_strategy }}"
|
||||
dcgmExporter:
|
||||
enabled: {{ dcgm_exporter_enabled }}
|
||||
{% if image_pull_secrets | length > 0 %}
|
||||
imagePullSecrets:
|
||||
{{ image_pull_secrets | to_nice_yaml(indent=2) | indent(2, true) }}
|
||||
{% endif %}
|
||||
36
roles/charts/ray_cluster/defaults/main.yml
Normal file
36
roles/charts/ray_cluster/defaults/main.yml
Normal file
@ -0,0 +1,36 @@
|
||||
---
|
||||
ray_namespace: "ray-system"
|
||||
ray_cluster_name: "ray-cluster"
|
||||
ray_image: "rayproject/ray:2.9.0"
|
||||
ray_version: "2.9.0"
|
||||
|
||||
ray_dashboard_enabled: true
|
||||
|
||||
ray_head_resources:
|
||||
requests:
|
||||
cpu: "2"
|
||||
memory: "8Gi"
|
||||
limits:
|
||||
cpu: "4"
|
||||
memory: "16Gi"
|
||||
|
||||
ray_worker_groups:
|
||||
- groupName: gpu-workers
|
||||
replicas: 2
|
||||
minReplicas: 1
|
||||
maxReplicas: 4
|
||||
resources:
|
||||
requests:
|
||||
cpu: "4"
|
||||
memory: "32Gi"
|
||||
nvidia.com/gpu: "1"
|
||||
limits:
|
||||
cpu: "8"
|
||||
memory: "64Gi"
|
||||
nvidia.com/gpu: "1"
|
||||
nodeSelector:
|
||||
accelerator: "nvidia-h100"
|
||||
tolerations: []
|
||||
volumeMounts:
|
||||
- mountPath: /dev/shm
|
||||
name: dshm
|
||||
1
roles/charts/ray_cluster/handlers/main.yml
Normal file
1
roles/charts/ray_cluster/handlers/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
24
roles/charts/ray_cluster/tasks/main.yml
Normal file
24
roles/charts/ray_cluster/tasks/main.yml
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
- name: Create Ray namespace
|
||||
kubernetes.core.k8s:
|
||||
name: "{{ ray_namespace }}"
|
||||
api_version: v1
|
||||
kind: Namespace
|
||||
state: present
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
|
||||
- name: Apply RayCluster CRD
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', 'raycluster.yaml.j2') | from_yaml }}"
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
|
||||
- name: Wait for Ray head node to be ready
|
||||
shell: |
|
||||
kubectl get pod -n {{ ray_namespace }} -l ray.io/node-type=head -o jsonpath='{.items[0].status.phase}'
|
||||
register: head_status
|
||||
until: head_status.stdout == "Running"
|
||||
retries: 30
|
||||
delay: 10
|
||||
changed_when: false
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
53
roles/charts/ray_cluster/templates/raycluster.yaml.j2
Normal file
53
roles/charts/ray_cluster/templates/raycluster.yaml.j2
Normal file
@ -0,0 +1,53 @@
|
||||
apiVersion: ray.io/v1
|
||||
kind: RayCluster
|
||||
metadata:
|
||||
name: {{ ray_cluster_name }}
|
||||
namespace: {{ ray_namespace }}
|
||||
spec:
|
||||
rayVersion: '{{ ray_version }}'
|
||||
headGroupSpec:
|
||||
rayStartParams:
|
||||
dashboard-host: '0.0.0.0'
|
||||
{% if not ray_dashboard_enabled %}
|
||||
dashboard-enabled: 'false'
|
||||
{% endif %}
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: ray-head
|
||||
image: {{ ray_image }}
|
||||
resources:
|
||||
{{ ray_head_resources | to_nice_yaml(indent=4) | indent(12, true) }}
|
||||
workerGroupSpecs:
|
||||
{% for group in ray_worker_groups %}
|
||||
- groupName: {{ group.groupName }}
|
||||
replicas: {{ group.replicas }}
|
||||
minReplicas: {{ group.minReplicas }}
|
||||
maxReplicas: {{ group.maxReplicas }}
|
||||
rayStartParams: {}
|
||||
template:
|
||||
spec:
|
||||
{% if group.nodeSelector is defined %}
|
||||
nodeSelector:
|
||||
{{ group.nodeSelector | to_nice_yaml(indent=2) | indent(10, true) }}
|
||||
{% endif %}
|
||||
{% if group.tolerations is defined and group.tolerations | length > 0 %}
|
||||
tolerations:
|
||||
{{ group.tolerations | to_nice_yaml(indent=2) | indent(10, true) }}
|
||||
{% endif %}
|
||||
containers:
|
||||
- name: ray-worker
|
||||
image: {{ ray_image }}
|
||||
resources:
|
||||
{{ group.resources | to_nice_yaml(indent=4) | indent(12, true) }}
|
||||
{% if group.volumeMounts is defined and group.volumeMounts | length > 0 %}
|
||||
volumeMounts:
|
||||
{{ group.volumeMounts | to_nice_yaml(indent=2) | indent(10, true) }}
|
||||
{% endif %}
|
||||
{% if group.volumeMounts is defined and group.volumeMounts | selectattr('name', 'equalto', 'dshm') | list | length > 0 %}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
1
roles/charts/ray_service/tasks/main.yml
Normal file
1
roles/charts/ray_service/tasks/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
1
roles/charts/vllm_runtime/tasks/main.yml
Normal file
1
roles/charts/vllm_runtime/tasks/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
31
roles/charts/vllm_service/defaults/main.yml
Normal file
31
roles/charts/vllm_service/defaults/main.yml
Normal file
@ -0,0 +1,31 @@
|
||||
---
|
||||
vllm_namespace: "vllm-system"
|
||||
vllm_service_name: "vllm-api"
|
||||
vllm_image: "vllm/vllm-openai:v0.4.2"
|
||||
vllm_model: "/models/Llama-3-70B-Instruct"
|
||||
|
||||
vllm_tensor_parallel_size: 2
|
||||
vllm_pipeline_parallel_size: 1
|
||||
vllm_gpu_memory_utilization: 0.90
|
||||
vllm_max_model_len: 4096
|
||||
vllm_max_num_seqs: 256
|
||||
|
||||
vllm_port: 8000
|
||||
vllm_service_type: "ClusterIP"
|
||||
vllm_ingress_enabled: false
|
||||
vllm_ingress_host: "vllm.example.com"
|
||||
|
||||
# Ray Integration
|
||||
ray_address: "ray://ray-cluster-head-svc.ray-system.svc.cluster.local:10001"
|
||||
|
||||
# Environment Variables
|
||||
nccl_socket_ifname: "eth0"
|
||||
gloo_socket_ifname: "eth0"
|
||||
nccl_ib_disable: "1"
|
||||
vllm_logging_level: "INFO"
|
||||
torch_distributed_init_timeout: "300"
|
||||
huggingface_token: ""
|
||||
|
||||
# Model Mount
|
||||
model_host_path: "/data/models"
|
||||
model_mount_path: "/models"
|
||||
1
roles/charts/vllm_service/handlers/main.yml
Normal file
1
roles/charts/vllm_service/handlers/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
36
roles/charts/vllm_service/tasks/main.yml
Normal file
36
roles/charts/vllm_service/tasks/main.yml
Normal file
@ -0,0 +1,36 @@
|
||||
---
|
||||
- name: Create vLLM namespace
|
||||
kubernetes.core.k8s:
|
||||
name: "{{ vllm_namespace }}"
|
||||
api_version: v1
|
||||
kind: Namespace
|
||||
state: present
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
|
||||
- name: Deploy vLLM Deployment
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', 'deployment.yaml.j2') | from_yaml }}"
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
|
||||
- name: Deploy vLLM Service
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', 'service.yaml.j2') | from_yaml }}"
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
|
||||
- name: Deploy vLLM Ingress
|
||||
kubernetes.core.k8s:
|
||||
state: present
|
||||
definition: "{{ lookup('template', 'ingress.yaml.j2') | from_yaml }}"
|
||||
when: inventory_hostname == groups['masters'][0] and vllm_ingress_enabled
|
||||
|
||||
- name: Wait for vLLM API to be ready
|
||||
shell: |
|
||||
kubectl get deploy {{ vllm_service_name }} -n {{ vllm_namespace }} -o jsonpath='{.status.readyReplicas}'
|
||||
register: vllm_ready
|
||||
until: vllm_ready.stdout == "1"
|
||||
retries: 40
|
||||
delay: 15
|
||||
changed_when: false
|
||||
when: inventory_hostname == groups['masters'][0]
|
||||
65
roles/charts/vllm_service/templates/deployment.yaml.j2
Normal file
65
roles/charts/vllm_service/templates/deployment.yaml.j2
Normal file
@ -0,0 +1,65 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ vllm_service_name }}
|
||||
namespace: {{ vllm_namespace }}
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: {{ vllm_service_name }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: {{ vllm_service_name }}
|
||||
spec:
|
||||
containers:
|
||||
- name: vllm
|
||||
image: {{ vllm_image }}
|
||||
command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
args:
|
||||
- "--model={{ vllm_model }}"
|
||||
- "--tensor-parallel-size={{ vllm_tensor_parallel_size }}"
|
||||
- "--pipeline-parallel-size={{ vllm_pipeline_parallel_size }}"
|
||||
- "--gpu-memory-utilization={{ vllm_gpu_memory_utilization }}"
|
||||
- "--max-model-len={{ vllm_max_model_len }}"
|
||||
- "--max-num-seqs={{ vllm_max_num_seqs }}"
|
||||
- "--port={{ vllm_port }}"
|
||||
- "--distributed-executor-backend=ray"
|
||||
- "--worker-use-ray"
|
||||
env:
|
||||
- name: RAY_ADDRESS
|
||||
value: "{{ ray_address }}"
|
||||
- name: NCCL_SOCKET_IFNAME
|
||||
value: "{{ nccl_socket_ifname }}"
|
||||
- name: GLOO_SOCKET_IFNAME
|
||||
value: "{{ gloo_socket_ifname }}"
|
||||
- name: NCCL_IB_DISABLE
|
||||
value: "{{ nccl_ib_disable }}"
|
||||
- name: VLLM_LOGGING_LEVEL
|
||||
value: "{{ vllm_logging_level }}"
|
||||
- name: TORCH_DISTRIBUTED_INIT_TIMEOUT
|
||||
value: "{{ torch_distributed_init_timeout }}"
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: "{{ huggingface_token }}"
|
||||
ports:
|
||||
- containerPort: {{ vllm_port }}
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: {{ vllm_port }}
|
||||
initialDelaySeconds: 30
|
||||
periodSeconds: 10
|
||||
volumeMounts:
|
||||
- name: dshm
|
||||
mountPath: /dev/shm
|
||||
- name: models
|
||||
mountPath: {{ model_mount_path }}
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
- name: models
|
||||
hostPath:
|
||||
path: {{ model_host_path }}
|
||||
type: DirectoryOrCreate
|
||||
17
roles/charts/vllm_service/templates/ingress.yaml.j2
Normal file
17
roles/charts/vllm_service/templates/ingress.yaml.j2
Normal file
@ -0,0 +1,17 @@
|
||||
apiVersion: networking.k8s.io/v1
|
||||
kind: Ingress
|
||||
metadata:
|
||||
name: {{ vllm_service_name }}-ingress
|
||||
namespace: {{ vllm_namespace }}
|
||||
spec:
|
||||
rules:
|
||||
- host: {{ vllm_ingress_host }}
|
||||
http:
|
||||
paths:
|
||||
- path: /
|
||||
pathType: Prefix
|
||||
backend:
|
||||
service:
|
||||
name: {{ vllm_service_name }}
|
||||
port:
|
||||
number: {{ vllm_port }}
|
||||
51
roles/charts/vllm_service/templates/rayservice_vllm.yaml.j2
Normal file
51
roles/charts/vllm_service/templates/rayservice_vllm.yaml.j2
Normal file
@ -0,0 +1,51 @@
|
||||
apiVersion: ray.io/v1
|
||||
kind: RayService
|
||||
metadata:
|
||||
name: {{ service_name }}
|
||||
namespace: default
|
||||
spec:
|
||||
serveConfigV2: |
|
||||
applications:
|
||||
- name: vllm_app
|
||||
import_path: "vllm.engine.arg_utils:AsyncEngineArgs"
|
||||
route_prefix: /
|
||||
rayClusterConfig:
|
||||
rayVersion: {{ kuberay_version }}
|
||||
headGroupSpec:
|
||||
rayStartParams:
|
||||
dashboard-host: '0.0.0.0'
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: ray-head
|
||||
image: rayproject/ray:{{ kuberay_version }}
|
||||
workerGroupSpecs:
|
||||
- replicas: {{ worker_replicas }}
|
||||
minReplicas: 1
|
||||
maxReplicas: {{ max_replicas }}
|
||||
groupName: gpu-group
|
||||
rayStartParams: {}
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: vllm-node
|
||||
image: {{ vllm_image }}
|
||||
resources:
|
||||
limits:
|
||||
nvidia.com/gpu: {{ gpus_per_worker }}
|
||||
env:
|
||||
- name: HUGGING_FACE_HUB_TOKEN
|
||||
value: "{{ huggingface_token }}"
|
||||
command: ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
||||
args:
|
||||
- "--model"
|
||||
- "{{ model_name_or_path }}"
|
||||
- "--tensor-parallel-size"
|
||||
- "{{ tensor_parallel_size }}"
|
||||
volumeMounts:
|
||||
- name: dshm
|
||||
mountPath: /dev/shm
|
||||
volumes:
|
||||
- name: dshm
|
||||
emptyDir:
|
||||
medium: Memory
|
||||
14
roles/charts/vllm_service/templates/service.yaml.j2
Normal file
14
roles/charts/vllm_service/templates/service.yaml.j2
Normal file
@ -0,0 +1,14 @@
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ vllm_service_name }}
|
||||
namespace: {{ vllm_namespace }}
|
||||
spec:
|
||||
type: {{ vllm_service_type }}
|
||||
ports:
|
||||
- port: {{ vllm_port }}
|
||||
targetPort: {{ vllm_port }}
|
||||
protocol: TCP
|
||||
name: http
|
||||
selector:
|
||||
app: {{ vllm_service_name }}
|
||||
1
roles/docker/container_runtime/tasks/main.yml
Normal file
1
roles/docker/container_runtime/tasks/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
1
roles/vhosts/cni_cilium/tasks/main.yml
Normal file
1
roles/vhosts/cni_cilium/tasks/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
12
roles/vhosts/kernel_tuning/tasks/main.yml
Normal file
12
roles/vhosts/kernel_tuning/tasks/main.yml
Normal file
@ -0,0 +1,12 @@
|
||||
---
|
||||
- name: Set max_map_count for vLLM/Ray
|
||||
sysctl:
|
||||
name: vm.max_map_count
|
||||
value: '262144'
|
||||
state: present
|
||||
reload: yes
|
||||
|
||||
- name: Load ip_vs module
|
||||
modprobe:
|
||||
name: ip_vs
|
||||
state: present
|
||||
10
roles/vhosts/sealos_cluster/defaults/main.yml
Normal file
10
roles/vhosts/sealos_cluster/defaults/main.yml
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
sealos_version: "5.0.0"
|
||||
kubernetes_version: "v1.28.9"
|
||||
pod_cidr: "10.244.0.0/16"
|
||||
service_cidr: "10.96.0.0/12"
|
||||
kubeconfig_local_path: "./admin.conf"
|
||||
cluster_name: "default"
|
||||
masters: "{{ groups['masters'] | default([]) }}"
|
||||
workers: "{{ groups['gpu_workers'] | default([]) }}"
|
||||
ansible_ssh_user: "root"
|
||||
1
roles/vhosts/sealos_cluster/handlers/main.yml
Normal file
1
roles/vhosts/sealos_cluster/handlers/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
40
roles/vhosts/sealos_cluster/tasks/main.yml
Normal file
40
roles/vhosts/sealos_cluster/tasks/main.yml
Normal file
@ -0,0 +1,40 @@
|
||||
---
|
||||
# README:
|
||||
# 前置条件:节点操作系统为兼容 Linux,节点间网络互通,已配置免密登录。
|
||||
# 执行顺序:仅在 masters 组的第一台节点执行,通过 sealos apply 自动同步到远端节点。
|
||||
|
||||
- name: Create sealos workspace
|
||||
file:
|
||||
path: /etc/sealos
|
||||
state: directory
|
||||
mode: '0755'
|
||||
when: inventory_hostname == masters[0]
|
||||
|
||||
- name: Generate Clusterfile
|
||||
template:
|
||||
src: clusterfile.yaml.j2
|
||||
dest: /etc/sealos/Clusterfile
|
||||
when: inventory_hostname == masters[0]
|
||||
|
||||
- name: Apply Sealos Cluster (Idempotent)
|
||||
command: "sealos apply -f /etc/sealos/Clusterfile"
|
||||
register: sealos_apply
|
||||
changed_when: "'Applied' in sealos_apply.stdout"
|
||||
failed_when: sealos_apply.rc != 0
|
||||
when: inventory_hostname == masters[0]
|
||||
|
||||
- name: Wait for nodes to be ready
|
||||
shell: "kubectl get nodes --no-headers | grep -v 'NotReady' | wc -l"
|
||||
register: ready_nodes
|
||||
until: ready_nodes.stdout | int == (masters | length + workers | length)
|
||||
retries: 30
|
||||
delay: 10
|
||||
changed_when: false
|
||||
when: inventory_hostname == masters[0]
|
||||
|
||||
- name: Fetch kubeconfig to local
|
||||
fetch:
|
||||
src: /etc/kubernetes/admin.conf
|
||||
dest: "{{ kubeconfig_local_path }}"
|
||||
flat: yes
|
||||
when: inventory_hostname == masters[0]
|
||||
15
roles/vhosts/sealos_cluster/templates/Clusterfile.j2
Normal file
15
roles/vhosts/sealos_cluster/templates/Clusterfile.j2
Normal file
@ -0,0 +1,15 @@
|
||||
apiVersion: cluster.sealos.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: {{ cluster_name }}
|
||||
spec:
|
||||
hosts:
|
||||
- ips: {{ groups['masters'] | map('extract', hostvars, ['ansible_host']) | list | to_json }}
|
||||
roles: [master]
|
||||
- ips: {{ groups['gpu_workers'] | map('extract', hostvars, ['ansible_host']) | list | to_json }}
|
||||
roles: [node]
|
||||
ssh:
|
||||
user: {{ ansible_user | default('root') }}
|
||||
network:
|
||||
podCIDR: {{ pod_cidr }}
|
||||
svcCIDR: {{ service_cidr }}
|
||||
15
roles/vhosts/sealos_cluster/templates/clusterfile.yaml.j2
Normal file
15
roles/vhosts/sealos_cluster/templates/clusterfile.yaml.j2
Normal file
@ -0,0 +1,15 @@
|
||||
apiVersion: cluster.sealos.io/v1
|
||||
kind: Cluster
|
||||
metadata:
|
||||
name: {{ cluster_name }}
|
||||
spec:
|
||||
hosts:
|
||||
- ips: {{ masters | map('extract', hostvars, ['ansible_host']) | list | to_json }}
|
||||
roles: [master]
|
||||
- ips: {{ workers | map('extract', hostvars, ['ansible_host']) | list | to_json }}
|
||||
roles: [node]
|
||||
ssh:
|
||||
user: {{ ansible_ssh_user }}
|
||||
network:
|
||||
podCIDR: {{ pod_cidr }}
|
||||
svcCIDR: {{ service_cidr }}
|
||||
1
roles/vhosts/validation/tasks/main.yml
Normal file
1
roles/vhosts/validation/tasks/main.yml
Normal file
@ -0,0 +1 @@
|
||||
---
|
||||
Loading…
Reference in New Issue
Block a user